Return 429 response on HTTP max connection limit (#13621)
Return 429 response on HTTP max connection limit. Instead of silently closing the connection, return a `429 Too Many Requests` HTTP response with a helpful error message to aid debugging when the connection limit is unintentionally reached. Set a 10-millisecond write timeout and rate limiter for connection-limit 429 response to prevent writing the HTTP response from consuming too many server resources. Add `nomad.agent.http.exceeded metric` counting the number of HTTP connections exceeding concurrency limit.
This commit is contained in:
parent
301c6e57f5
commit
5354409b1a
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:improvement
|
||||||
|
api: HTTP server now returns a 429 error code when hitting the connection limit
|
||||||
|
```
|
|
@ -14,6 +14,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/armon/go-metrics"
|
||||||
assetfs "github.com/elazarl/go-bindata-assetfs"
|
assetfs "github.com/elazarl/go-bindata-assetfs"
|
||||||
"github.com/gorilla/handlers"
|
"github.com/gorilla/handlers"
|
||||||
"github.com/gorilla/websocket"
|
"github.com/gorilla/websocket"
|
||||||
|
@ -22,6 +23,7 @@ import (
|
||||||
"github.com/hashicorp/go-msgpack/codec"
|
"github.com/hashicorp/go-msgpack/codec"
|
||||||
multierror "github.com/hashicorp/go-multierror"
|
multierror "github.com/hashicorp/go-multierror"
|
||||||
"github.com/rs/cors"
|
"github.com/rs/cors"
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
"github.com/hashicorp/nomad/acl"
|
"github.com/hashicorp/nomad/acl"
|
||||||
"github.com/hashicorp/nomad/helper/noxssrw"
|
"github.com/hashicorp/nomad/helper/noxssrw"
|
||||||
|
@ -156,7 +158,7 @@ func NewHTTPServers(agent *Agent, config *Config) ([]*HTTPServer, error) {
|
||||||
httpServer := http.Server{
|
httpServer := http.Server{
|
||||||
Addr: srv.Addr,
|
Addr: srv.Addr,
|
||||||
Handler: handlers.CompressHandler(srv.mux),
|
Handler: handlers.CompressHandler(srv.mux),
|
||||||
ConnState: makeConnState(config.TLSConfig.EnableHTTP, handshakeTimeout, maxConns),
|
ConnState: makeConnState(config.TLSConfig.EnableHTTP, handshakeTimeout, maxConns, srv.logger),
|
||||||
ErrorLog: newHTTPServerLogger(srv.logger),
|
ErrorLog: newHTTPServerLogger(srv.logger),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -213,13 +215,12 @@ func NewHTTPServers(agent *Agent, config *Config) ([]*HTTPServer, error) {
|
||||||
//
|
//
|
||||||
// If limit > 0, a per-address connection limit will be enabled regardless of
|
// If limit > 0, a per-address connection limit will be enabled regardless of
|
||||||
// TLS. If connLimit == 0 there is no connection limit.
|
// TLS. If connLimit == 0 there is no connection limit.
|
||||||
func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) func(conn net.Conn, state http.ConnState) {
|
func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int, logger log.Logger) func(conn net.Conn, state http.ConnState) {
|
||||||
|
connLimiter := connLimiter(connLimit, logger)
|
||||||
if !isTLS || handshakeTimeout == 0 {
|
if !isTLS || handshakeTimeout == 0 {
|
||||||
if connLimit > 0 {
|
if connLimit > 0 {
|
||||||
// Still return the connection limiter
|
// Still return the connection limiter
|
||||||
return connlimit.NewLimiter(connlimit.Config{
|
return connLimiter
|
||||||
MaxConnsPerClientIP: connLimit,
|
|
||||||
}).HTTPConnStateFunc()
|
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -227,9 +228,6 @@ func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) fu
|
||||||
if connLimit > 0 {
|
if connLimit > 0 {
|
||||||
// Return conn state callback with connection limiting and a
|
// Return conn state callback with connection limiting and a
|
||||||
// handshake timeout.
|
// handshake timeout.
|
||||||
connLimiter := connlimit.NewLimiter(connlimit.Config{
|
|
||||||
MaxConnsPerClientIP: connLimit,
|
|
||||||
}).HTTPConnStateFunc()
|
|
||||||
|
|
||||||
return func(conn net.Conn, state http.ConnState) {
|
return func(conn net.Conn, state http.ConnState) {
|
||||||
switch state {
|
switch state {
|
||||||
|
@ -268,6 +266,35 @@ func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) fu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// connLimiter returns a connection-limiter function with a rate-limited 429-response error handler.
|
||||||
|
// The rate-limit prevents the TLS handshake necessary to write the HTTP response
|
||||||
|
// from consuming too many server resources.
|
||||||
|
func connLimiter(connLimit int, logger log.Logger) func(conn net.Conn, state http.ConnState) {
|
||||||
|
// Global rate-limit of 10 responses per second with a 100-response burst.
|
||||||
|
limiter := rate.NewLimiter(10, 100)
|
||||||
|
|
||||||
|
tooManyConnsMsg := "Your IP is issuing too many concurrent connections, please rate limit your calls\n"
|
||||||
|
tooManyRequestsResponse := []byte(fmt.Sprintf("HTTP/1.1 429 Too Many Requests\r\n"+
|
||||||
|
"Content-Type: text/plain\r\n"+
|
||||||
|
"Content-Length: %d\r\n"+
|
||||||
|
"Connection: close\r\n\r\n%s", len(tooManyConnsMsg), tooManyConnsMsg))
|
||||||
|
return connlimit.NewLimiter(connlimit.Config{
|
||||||
|
MaxConnsPerClientIP: connLimit,
|
||||||
|
}).HTTPConnStateFuncWithErrorHandler(func(err error, conn net.Conn) {
|
||||||
|
if err == connlimit.ErrPerClientIPLimitReached {
|
||||||
|
metrics.IncrCounter([]string{"nomad", "agent", "http", "exceeded"}, 1)
|
||||||
|
if n := limiter.Reserve(); n.Delay() == 0 {
|
||||||
|
logger.Warn("Too many concurrent connections", "address", conn.RemoteAddr().String(), "limit", connLimit)
|
||||||
|
conn.SetDeadline(time.Now().Add(10 * time.Millisecond))
|
||||||
|
conn.Write(tooManyRequestsResponse)
|
||||||
|
} else {
|
||||||
|
n.Cancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
conn.Close()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// tcpKeepAliveListener sets TCP keep-alive timeouts on accepted
|
// tcpKeepAliveListener sets TCP keep-alive timeouts on accepted
|
||||||
// connections. It's used by NewHttpServer so
|
// connections. It's used by NewHttpServer so
|
||||||
// dead TCP connections eventually go away.
|
// dead TCP connections eventually go away.
|
||||||
|
|
|
@ -1254,41 +1254,15 @@ func TestHTTPServer_Limits_OK(t *testing.T) {
|
||||||
// Create a new connection that will go over the connection limit.
|
// Create a new connection that will go over the connection limit.
|
||||||
limitConn, err := dial(t, addr, useTLS)
|
limitConn, err := dial(t, addr, useTLS)
|
||||||
|
|
||||||
// At this point, the state of the connection + handshake are up in the
|
response := "HTTP/1.1 429"
|
||||||
// air.
|
buf := make([]byte, len(response))
|
||||||
//
|
deadline := time.Now().Add(10 * time.Second)
|
||||||
// 1) dial failed, handshake never started
|
require.NoError(t, limitConn.SetReadDeadline(deadline))
|
||||||
// => Conn is nil + io.EOF
|
n, err := limitConn.Read(buf)
|
||||||
// 2) dial completed, handshake failed
|
require.Equal(t, response, string(buf))
|
||||||
// => Conn is malformed + (net.OpError OR io.EOF)
|
require.Nil(t, err)
|
||||||
// 3) dial completed, handshake succeeded
|
require.Equal(t, len(response), n)
|
||||||
// => Conn is not nil + no error, however using the Conn should
|
require.NoError(t, limitConn.Close())
|
||||||
// result in io.EOF
|
|
||||||
//
|
|
||||||
// At no point should Nomad actually write through the limited Conn.
|
|
||||||
|
|
||||||
if limitConn == nil || err != nil {
|
|
||||||
// Case 1 or Case 2 - returned Conn is useless and the error should
|
|
||||||
// be one of:
|
|
||||||
// "EOF"
|
|
||||||
// "closed network connection"
|
|
||||||
// "connection reset by peer"
|
|
||||||
msg := err.Error()
|
|
||||||
acceptable := strings.Contains(msg, "EOF") ||
|
|
||||||
strings.Contains(msg, "closed network connection") ||
|
|
||||||
strings.Contains(msg, "connection reset by peer")
|
|
||||||
require.True(t, acceptable)
|
|
||||||
} else {
|
|
||||||
// Case 3 - returned Conn is usable, but Nomad should not write
|
|
||||||
// anything before closing it.
|
|
||||||
buf := make([]byte, bufSize)
|
|
||||||
deadline := time.Now().Add(10 * time.Second)
|
|
||||||
require.NoError(t, limitConn.SetReadDeadline(deadline))
|
|
||||||
n, err := limitConn.Read(buf)
|
|
||||||
require.Equal(t, io.EOF, err)
|
|
||||||
require.Zero(t, n)
|
|
||||||
require.NoError(t, limitConn.Close())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Assert existing connections are ok
|
// Assert existing connections are ok
|
||||||
require.Len(t, errCh, 0)
|
require.Len(t, errCh, 0)
|
||||||
|
|
|
@ -480,6 +480,14 @@ Raft database metrics are emitted by the `raft-boltdb` library.
|
||||||
| `nomad.raft.boltdb.txstats.write` | Count of total write operations | Integer | Counter |
|
| `nomad.raft.boltdb.txstats.write` | Count of total write operations | Integer | Counter |
|
||||||
| `nomad.raft.boltdb.txstats.writeTime` | Sample of write operation times | Nanoseconds | Summary |
|
| `nomad.raft.boltdb.txstats.writeTime` | Sample of write operation times | Nanoseconds | Summary |
|
||||||
|
|
||||||
|
## Agent Metrics
|
||||||
|
|
||||||
|
Agent metrics are emitted by all Nomad agents running in either client or server mode.
|
||||||
|
|
||||||
|
| Metric | Description | Unit | Type |
|
||||||
|
| ----------------------------------------- | ----------------------------------------------------- | ----------- | ------- |
|
||||||
|
| `nomad.agent.http.exceeded` | Count of HTTP connections exceeding concurrency limit | Integer | Counter |
|
||||||
|
|
||||||
[tagged-metrics]: /docs/telemetry/metrics#tagged-metrics
|
[tagged-metrics]: /docs/telemetry/metrics#tagged-metrics
|
||||||
[sticky]: /docs/job-specification/ephemeral_disk#sticky
|
[sticky]: /docs/job-specification/ephemeral_disk#sticky
|
||||||
[s_port_plan_failure]: /s/port-plan-failure
|
[s_port_plan_failure]: /s/port-plan-failure
|
||||||
|
|
Loading…
Reference in New Issue