From e2a2ea68fcaa784b6dd42f91a84708e2484cc18e Mon Sep 17 00:00:00 2001 From: James Rasell Date: Mon, 21 Nov 2022 16:19:09 +0100 Subject: [PATCH] client: accommodate Consul 1.14.0 gRPC and agent self changes. (#15309) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * client: accommodate Consul 1.14.0 gRPC and agent self changes. Consul 1.14.0 changed the way in which gRPC listeners are configured, particularly when using TLS. Prior to the change, a single listener was responsible for handling plain-text and encrypted gRPC requests. In 1.14.0 and beyond, separate listeners will be used for each, defaulting to 8502 and 8503 for plain-text and TLS respectively. The change means that Nomad’s Consul Connect integration would not work when integrated with Consul clusters using TLS and running 1.14.0 or greater. The Nomad Consul fingerprinter identifies the gRPC port Consul has exposed using the "DebugConfig.GRPCPort" value from Consul’s “/v1/agent/self” endpoint. In Consul 1.14.0 and greater, this only represents the plain-text gRPC port which is likely to be disbaled in clusters running TLS. In order to fix this issue, Nomad now takes into account the Consul version and configured scheme to optionally use “DebugConfig.GRPCTLSPort” value from Consul’s agent self return. The “consul_grcp_socket” allocrunner hook has also been updated so that the fingerprinted gRPC port attribute is passed in. This provides a better fallback method, when the operator does not configure the “consul.grpc_address” option. * docs: modify Consul Connect entries to detail 1.14.0 changes. * changelog: add entry for #15309 * fixup: tidy tests and clean version match from review feedback. * fixup: use strings tolower func. --- .changelog/15309.txt | 3 + client/allocrunner/alloc_runner_hooks.go | 2 +- client/allocrunner/consul_grpc_sock_hook.go | 43 ++++++++--- .../allocrunner/consul_grpc_sock_hook_test.go | 8 +- client/fingerprint/consul.go | 53 ++++++++++++- client/fingerprint/consul_test.go | 74 +++++++++++++++++-- website/content/docs/configuration/consul.mdx | 6 +- .../docs/integrations/consul-connect.mdx | 6 +- 8 files changed, 169 insertions(+), 26 deletions(-) create mode 100644 .changelog/15309.txt diff --git a/.changelog/15309.txt b/.changelog/15309.txt new file mode 100644 index 000000000..73621fea2 --- /dev/null +++ b/.changelog/15309.txt @@ -0,0 +1,3 @@ +```release-note:bug +fingerprint: Ensure Nomad can correctly fingerprint Consul gRPC where the Consul agent is running v1.14.0 or greater +``` diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index 37d9b4a49..c2d69ca0e 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -161,7 +161,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { logger: hookLogger, shutdownDelayCtx: ar.shutdownDelayCtx, }), - newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), + newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig, config.Node.Attributes), newConsulHTTPSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), newCSIHook(alloc, hookLogger, ar.csiManager, ar.rpcClient, ar, hrs, ar.clientConfig.Node.SecretID), newChecksHook(hookLogger, alloc, ar.checkStore, ar), diff --git a/client/allocrunner/consul_grpc_sock_hook.go b/client/allocrunner/consul_grpc_sock_hook.go index 4a7cf4e34..a9e7c2ff1 100644 --- a/client/allocrunner/consul_grpc_sock_hook.go +++ b/client/allocrunner/consul_grpc_sock_hook.go @@ -25,6 +25,11 @@ const ( // socketProxyStopWaitTime is the amount of time to wait for a socket proxy // to stop before assuming something went awry and return a timeout error. socketProxyStopWaitTime = 3 * time.Second + + // consulGRPCFallbackPort is the last resort fallback port to use in + // combination with the Consul HTTP config address when creating the + // socket. + consulGRPCFallbackPort = "8502" ) var ( @@ -45,10 +50,20 @@ type consulGRPCSocketHook struct { proxy *grpcSocketProxy } -func newConsulGRPCSocketHook(logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *consulGRPCSocketHook { +func newConsulGRPCSocketHook( + logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, + config *config.ConsulConfig, nodeAttrs map[string]string) *consulGRPCSocketHook { + + // Attempt to find the gRPC port via the node attributes, otherwise use the + // default fallback. + consulGRPCPort, ok := nodeAttrs["consul.grpc"] + if !ok { + consulGRPCPort = consulGRPCFallbackPort + } + return &consulGRPCSocketHook{ alloc: alloc, - proxy: newGRPCSocketProxy(logger, allocDir, config), + proxy: newGRPCSocketProxy(logger, allocDir, config, consulGRPCPort), logger: logger.Named(consulGRPCSockHookName), } } @@ -119,21 +134,29 @@ type grpcSocketProxy struct { allocDir *allocdir.AllocDir config *config.ConsulConfig + // consulGRPCFallbackPort is the port to use if the operator did not + // specify a gRPC config address. + consulGRPCFallbackPort string + ctx context.Context cancel func() doneCh chan struct{} runOnce bool } -func newGRPCSocketProxy(logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *grpcSocketProxy { +func newGRPCSocketProxy( + logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig, + consulGRPCFallbackPort string) *grpcSocketProxy { + ctx, cancel := context.WithCancel(context.Background()) return &grpcSocketProxy{ - allocDir: allocDir, - config: config, - ctx: ctx, - cancel: cancel, - doneCh: make(chan struct{}), - logger: logger, + allocDir: allocDir, + config: config, + consulGRPCFallbackPort: consulGRPCFallbackPort, + ctx: ctx, + cancel: cancel, + doneCh: make(chan struct{}), + logger: logger, } } @@ -173,7 +196,7 @@ func (p *grpcSocketProxy) run(alloc *structs.Allocation) error { p.config.Addr, err) } - destAddr = net.JoinHostPort(host, "8502") + destAddr = net.JoinHostPort(host, p.consulGRPCFallbackPort) } hostGRPCSocketPath := filepath.Join(p.allocDir.AllocDir, allocdir.AllocGRPCSocket) diff --git a/client/allocrunner/consul_grpc_sock_hook_test.go b/client/allocrunner/consul_grpc_sock_hook_test.go index 9f7855ddf..6e8e1ed06 100644 --- a/client/allocrunner/consul_grpc_sock_hook_test.go +++ b/client/allocrunner/consul_grpc_sock_hook_test.go @@ -42,7 +42,7 @@ func TestConsulGRPCSocketHook_PrerunPostrun_Ok(t *testing.T) { defer cleanup() // Start the unix socket proxy - h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig, map[string]string{}) require.NoError(t, h.Prerun()) gRPCSock := filepath.Join(allocDir.AllocDir, allocdir.AllocGRPCSocket) @@ -116,7 +116,7 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) { { // An alloc without a Connect proxy sidecar should not return // an error. - h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig, map[string]string{}) require.NoError(t, h.Prerun()) // Postrun should be a noop @@ -126,7 +126,7 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) { { // An alloc *with* a Connect proxy sidecar *should* return an error // when Consul is not configured. - h := newConsulGRPCSocketHook(logger, connectAlloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, connectAlloc, allocDir, consulConfig, map[string]string{}) require.EqualError(t, h.Prerun(), "consul address must be set on nomad client") // Postrun should be a noop @@ -136,7 +136,7 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) { { // Updating an alloc without a sidecar to have a sidecar should // error when the sidecar is added. - h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig, map[string]string{}) require.NoError(t, h.Prerun()) req := &interfaces.RunnerUpdateRequest{ diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go index e1b2b449d..15f8ce25e 100644 --- a/client/fingerprint/consul.go +++ b/client/fingerprint/consul.go @@ -3,10 +3,12 @@ package fingerprint import ( "fmt" "strconv" + "strings" "time" consulapi "github.com/hashicorp/consul/api" log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-version" agentconsul "github.com/hashicorp/nomad/command/agent/consul" ) @@ -15,6 +17,14 @@ const ( consulUnavailable = "unavailable" ) +var ( + // consulGRPCPortChangeVersion is the Consul version which made a breaking + // change to the way gRPC API listeners are created. This means Nomad must + // perform different fingerprinting depending on which version of Consul it + // is communicating with. + consulGRPCPortChangeVersion = version.Must(version.NewVersion("1.14.0")) +) + // ConsulFingerprint is used to fingerprint for Consul type ConsulFingerprint struct { logger log.Logger @@ -97,7 +107,7 @@ func (f *ConsulFingerprint) initialize(req *FingerprintRequest) error { "consul.datacenter": f.dc, "consul.segment": f.segment, "consul.connect": f.connect, - "consul.grpc": f.grpc, + "consul.grpc": f.grpc(consulConfig.Scheme), "consul.ft.namespaces": f.namespaces, } } @@ -173,11 +183,50 @@ func (f *ConsulFingerprint) connect(info agentconsul.Self) (string, bool) { return strconv.FormatBool(c), ok } -func (f *ConsulFingerprint) grpc(info agentconsul.Self) (string, bool) { +func (f *ConsulFingerprint) grpc(scheme string) func(info agentconsul.Self) (string, bool) { + return func(info agentconsul.Self) (string, bool) { + + // The version is needed in order to understand which config object to + // query. This is because Consul 1.14.0 added a new gRPC port which + // broke the previous behaviour. + v, ok := info["Config"]["Version"].(string) + if !ok { + return "", false + } + + consulVersion, err := version.NewVersion(v) + if err != nil { + return "", false + } + + // If the Consul agent being fingerprinted is running a version less + // than 1.14.0 we use the original single gRPC port. + if consulVersion.Core().LessThan(consulGRPCPortChangeVersion.Core()) { + return f.grpcPort(info) + } + + // Now that we know we are querying a Consul agent running v1.14.0 or + // greater, we need to select the correct port parameter from the + // config depending on whether we have been asked to speak TLS or not. + switch strings.ToLower(scheme) { + case "https": + return f.grpcTLSPort(info) + default: + return f.grpcPort(info) + } + } +} + +func (f *ConsulFingerprint) grpcPort(info agentconsul.Self) (string, bool) { p, ok := info["DebugConfig"]["GRPCPort"].(float64) return fmt.Sprintf("%d", int(p)), ok } +func (f *ConsulFingerprint) grpcTLSPort(info agentconsul.Self) (string, bool) { + p, ok := info["DebugConfig"]["GRPCTLSPort"].(float64) + return fmt.Sprintf("%d", int(p)), ok +} + func (f *ConsulFingerprint) namespaces(info agentconsul.Self) (string, bool) { return strconv.FormatBool(agentconsul.Namespaces(info)), true } diff --git a/client/fingerprint/consul_test.go b/client/fingerprint/consul_test.go index b8b322fb0..00d6ace47 100644 --- a/client/fingerprint/consul_test.go +++ b/client/fingerprint/consul_test.go @@ -296,29 +296,91 @@ func TestConsulFingerprint_grpc(t *testing.T) { fp := newConsulFingerPrint(t) - t.Run("grpc set", func(t *testing.T) { - s, ok := fp.grpc(agentconsul.Self{ + t.Run("grpc set pre-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, "DebugConfig": {"GRPCPort": 8502.0}, // JSON numbers are floats }) require.True(t, ok) require.Equal(t, "8502", s) }) - t.Run("grpc disabled", func(t *testing.T) { - s, ok := fp.grpc(agentconsul.Self{ + t.Run("grpc disabled pre-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, "DebugConfig": {"GRPCPort": -1.0}, // JSON numbers are floats }) require.True(t, ok) require.Equal(t, "-1", s) }) - t.Run("grpc missing", func(t *testing.T) { - _, ok := fp.grpc(agentconsul.Self{ + t.Run("grpc set pre-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, + "DebugConfig": {"GRPCPort": 8502.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "8502", s) + }) + + t.Run("grpc disabled pre-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, + "DebugConfig": {"GRPCPort": -1.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "-1", s) + }) + + t.Run("grpc set post-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCPort": 8502.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "8502", s) + }) + + t.Run("grpc disabled post-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCPort": -1.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "-1", s) + }) + + t.Run("grpc disabled post-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCTLSPort": -1.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "-1", s) + }) + + t.Run("grpc set post-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCTLSPort": 8503.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "8503", s) + }) + + t.Run("grpc missing http", func(t *testing.T) { + _, ok := fp.grpc("http")(agentconsul.Self{ "DebugConfig": {}, }) require.False(t, ok) }) + t.Run("grpc missing https", func(t *testing.T) { + _, ok := fp.grpc("https")(agentconsul.Self{ + "DebugConfig": {}, + }) + require.False(t, ok) + }) } func TestConsulFingerprint_namespaces(t *testing.T) { diff --git a/website/content/docs/configuration/consul.mdx b/website/content/docs/configuration/consul.mdx index 6858a2d5e..3c3fc3d5c 100644 --- a/website/content/docs/configuration/consul.mdx +++ b/website/content/docs/configuration/consul.mdx @@ -90,8 +90,8 @@ configuring Nomad to talk to Consul via DNS such as consul.service.consul - `grpc_address` `(string: "127.0.0.1:8502")` - Specifies the address to the local Consul agent for `gRPC` requests, given in the format `host:port`. Note that - Consul does not enable the [`grpc`](https://developer.hashicorp.com/consul/docs/agent/config/config-files#_grpc_port) - listener by default. + Consul does not enable the [`grpc`][grpc_port] or [`grpc_tls`][grpctls_port] + listeners by default. - `key_file` `(string: "")` - Specifies the path to the private key used for Consul communication. If this is set then you need to also set `cert_file`. @@ -227,3 +227,5 @@ namespace "nomad-ns" { [consul]: https://www.consul.io/ 'Consul by HashiCorp' [bootstrap]: https://learn.hashicorp.com/tutorials/nomad/clustering 'Automatic Bootstrapping' [go-sockaddr/template]: https://pkg.go.dev/github.com/hashicorp/go-sockaddr/template +[grpc_port]: https://developer.hashicorp.com/consul/docs/agent/config/config-files#grpc_port +[grpctls_port]: https://developer.hashicorp.com/consul/docs/agent/config/config-files#grpc_tls_port diff --git a/website/content/docs/integrations/consul-connect.mdx b/website/content/docs/integrations/consul-connect.mdx index 75e314c02..ea8490d4a 100644 --- a/website/content/docs/integrations/consul-connect.mdx +++ b/website/content/docs/integrations/consul-connect.mdx @@ -63,7 +63,10 @@ $ consul agent -dev To use service mesh on a non-dev Consul agent, you will minimally need to enable the GRPC port and set `connect` to enabled by adding some additional information to -your Consul client configurations, depending on format. +your Consul client configurations, depending on format. Consul agents running TLS +and a version greater than [1.14.0](https://releases.hashicorp.com/consul/1.14.0) +should set the `grpc_tls` configuration parameter instead of `grpc`. Please see +the Consul [port documentation](consul_ports) for further reference material. For HCL configurations: @@ -352,3 +355,4 @@ dashes (`-`) are converted to underscores (`_`) in environment variables so [gh-9907]: https://github.com/hashicorp/nomad/issues/9907 [`Local`]: https://developer.hashicorp.com/consul/docs/security/acl/acl-tokens#token-attributes [anon_token]: https://developer.hashicorp.com/consul/docs/security/acl/acl-tokens#special-purpose-tokens +[consul_ports]: https://developer.hashicorp.com/consul/docs/agent/config/config-files#ports