Rate limit improvements and fixes (#15917)
- Fixes a panic when Operation.SourceAddr is nil (internal net/rpc calls) - Adds proper HTTP response codes (429 and 503) for rate limit errors - Makes the error messages clearer - Enables automatic retries for rate-limit errors in the net/rpc stack
This commit is contained in:
parent
2a90faa4b1
commit
f24f8c681f
|
@ -19,14 +19,14 @@ var (
|
|||
// rate limit was exhausted, but may succeed on a different server.
|
||||
//
|
||||
// Results in a RESOURCE_EXHAUSTED or "429 Too Many Requests" response.
|
||||
ErrRetryElsewhere = errors.New("rate limit exceeded, try a different server")
|
||||
ErrRetryElsewhere = errors.New("rate limit exceeded, try again later or against a different server")
|
||||
|
||||
// ErrRetryLater indicates that the operation was not allowed because the rate
|
||||
// limit was exhausted, and trying a different server won't help (e.g. because
|
||||
// the operation can only be performed on the leader).
|
||||
//
|
||||
// Results in an UNAVAILABLE or "503 Service Unavailable" response.
|
||||
ErrRetryLater = errors.New("rate limit exceeded, try again later")
|
||||
ErrRetryLater = errors.New("rate limit exceeded for operation that can only be performed by the leader, try again later")
|
||||
)
|
||||
|
||||
// Mode determines the action that will be taken when a rate limit has been
|
||||
|
@ -209,7 +209,7 @@ func (h *Handler) Allow(op Operation) error {
|
|||
enforced := l.mode == ModeEnforcing
|
||||
h.logger.Trace("RPC exceeded allowed rate limit",
|
||||
"rpc", op.Name,
|
||||
"source_addr", op.SourceAddr.String(),
|
||||
"source_addr", op.SourceAddr,
|
||||
"limit_type", l.desc,
|
||||
"limit_enforced", enforced,
|
||||
)
|
||||
|
|
|
@ -26,6 +26,7 @@ import (
|
|||
msgpackrpc "github.com/hashicorp/consul-net-rpc/net-rpc-msgpackrpc"
|
||||
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/consul/rate"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
"github.com/hashicorp/consul/agent/consul/wanfed"
|
||||
"github.com/hashicorp/consul/agent/metadata"
|
||||
|
@ -570,9 +571,19 @@ func canRetry(info structs.RPCInfo, err error, start time.Time, config *Config)
|
|||
return true
|
||||
}
|
||||
|
||||
// If we are chunking and it doesn't seem to have completed, try again.
|
||||
if err != nil && strings.Contains(err.Error(), ErrChunkingResubmit.Error()) {
|
||||
return true
|
||||
retryableMessages := []error{
|
||||
// If we are chunking and it doesn't seem to have completed, try again.
|
||||
ErrChunkingResubmit,
|
||||
|
||||
// These rate limit errors are returned before the handler is called, so are
|
||||
// safe to retry.
|
||||
rate.ErrRetryElsewhere,
|
||||
rate.ErrRetryLater,
|
||||
}
|
||||
for _, m := range retryableMessages {
|
||||
if err != nil && strings.Contains(err.Error(), m.Error()) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Reads are safe to retry for stream errors, such as if a server was
|
||||
|
|
|
@ -30,6 +30,7 @@ import (
|
|||
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/connect"
|
||||
"github.com/hashicorp/consul/agent/consul/rate"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
agent_grpc "github.com/hashicorp/consul/agent/grpc-internal"
|
||||
"github.com/hashicorp/consul/agent/pool"
|
||||
|
@ -1310,6 +1311,16 @@ func TestCanRetry(t *testing.T) {
|
|||
err: fmt.Errorf("some wrapping: %w", structs.ErrNoLeader),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "ErrRetryElsewhere",
|
||||
err: fmt.Errorf("some wrapping: %w", rate.ErrRetryElsewhere),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "ErrRetryLater",
|
||||
err: fmt.Errorf("some wrapping: %w", rate.ErrRetryLater),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "EOF on read request",
|
||||
req: isReadRequest{},
|
||||
|
|
|
@ -29,6 +29,7 @@ import (
|
|||
"github.com/hashicorp/consul/agent/cache"
|
||||
"github.com/hashicorp/consul/agent/config"
|
||||
"github.com/hashicorp/consul/agent/consul"
|
||||
"github.com/hashicorp/consul/agent/consul/rate"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/agent/uiserver"
|
||||
"github.com/hashicorp/consul/api"
|
||||
|
@ -393,16 +394,49 @@ func (s *HTTPHandlers) wrap(handler endpoint, methods []string) http.HandlerFunc
|
|||
return false
|
||||
}
|
||||
|
||||
isTooManyRequests := func(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Client-side RPC limits.
|
||||
if structs.IsErrRPCRateExceeded(err) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Connect CA rate limiter.
|
||||
if err.Error() == consul.ErrRateLimited.Error() {
|
||||
return true
|
||||
}
|
||||
|
||||
// gRPC server rate limit interceptor.
|
||||
if status.Code(err) == codes.ResourceExhausted {
|
||||
return true
|
||||
}
|
||||
|
||||
// net/rpc server rate limit interceptor.
|
||||
return strings.Contains(err.Error(), rate.ErrRetryElsewhere.Error())
|
||||
}
|
||||
|
||||
isServiceUnavailable := func(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// gRPC server rate limit interceptor.
|
||||
if status.Code(err) == codes.Unavailable {
|
||||
return true
|
||||
}
|
||||
|
||||
// net/rpc server rate limit interceptor.
|
||||
return strings.Contains(err.Error(), rate.ErrRetryLater.Error())
|
||||
}
|
||||
|
||||
isMethodNotAllowed := func(err error) bool {
|
||||
_, ok := err.(MethodNotAllowedError)
|
||||
return ok
|
||||
}
|
||||
|
||||
isTooManyRequests := func(err error) bool {
|
||||
// Sadness net/rpc can't do nice typed errors so this is all we got
|
||||
return err.Error() == consul.ErrRateLimited.Error()
|
||||
}
|
||||
|
||||
addAllowHeader := func(methods []string) {
|
||||
resp.Header().Add("Allow", strings.Join(methods, ","))
|
||||
}
|
||||
|
@ -427,12 +461,19 @@ func (s *HTTPHandlers) wrap(handler endpoint, methods []string) http.HandlerFunc
|
|||
"error", err)
|
||||
}
|
||||
|
||||
// If the error came from gRPC, unpack it to get the real message.
|
||||
msg := err.Error()
|
||||
if s, ok := status.FromError(err); ok {
|
||||
msg = s.Message()
|
||||
}
|
||||
|
||||
switch {
|
||||
case isForbidden(err):
|
||||
resp.WriteHeader(http.StatusForbidden)
|
||||
fmt.Fprint(resp, err.Error())
|
||||
case structs.IsErrRPCRateExceeded(err):
|
||||
case isTooManyRequests(err):
|
||||
resp.WriteHeader(http.StatusTooManyRequests)
|
||||
case isServiceUnavailable(err):
|
||||
resp.WriteHeader(http.StatusServiceUnavailable)
|
||||
case isMethodNotAllowed(err):
|
||||
// RFC2616 states that for 405 Method Not Allowed the response
|
||||
// MUST include an Allow header containing the list of valid
|
||||
|
@ -440,26 +481,21 @@ func (s *HTTPHandlers) wrap(handler endpoint, methods []string) http.HandlerFunc
|
|||
// https://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
|
||||
addAllowHeader(err.(MethodNotAllowedError).Allow)
|
||||
resp.WriteHeader(http.StatusMethodNotAllowed) // 405
|
||||
fmt.Fprint(resp, err.Error())
|
||||
case isHTTPError(err):
|
||||
err := err.(HTTPError)
|
||||
code := http.StatusInternalServerError
|
||||
if err.StatusCode != 0 {
|
||||
code = err.StatusCode
|
||||
}
|
||||
reason := "An unexpected error occurred"
|
||||
if err.Error() != "" {
|
||||
reason = err.Error()
|
||||
if msg == "" {
|
||||
msg = "An unexpected error occurred"
|
||||
}
|
||||
resp.WriteHeader(code)
|
||||
fmt.Fprint(resp, reason)
|
||||
case isTooManyRequests(err):
|
||||
resp.WriteHeader(http.StatusTooManyRequests)
|
||||
fmt.Fprint(resp, err.Error())
|
||||
default:
|
||||
resp.WriteHeader(http.StatusInternalServerError)
|
||||
fmt.Fprint(resp, err.Error())
|
||||
}
|
||||
|
||||
fmt.Fprint(resp, msg)
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
|
|
Loading…
Reference in New Issue