diff --git a/.changelog/15794.txt b/.changelog/15794.txt new file mode 100644 index 000000000..8195ccce6 --- /dev/null +++ b/.changelog/15794.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli: connect envoy command errors if grpc ports are not open +``` \ No newline at end of file diff --git a/command/connect/envoy/envoy.go b/command/connect/envoy/envoy.go index bd5be872e..9ee9523a0 100644 --- a/command/connect/envoy/envoy.go +++ b/command/connect/envoy/envoy.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "strings" + "time" "github.com/mitchellh/cli" "github.com/mitchellh/mapstructure" @@ -70,6 +71,8 @@ type cmd struct { gatewaySvcName string gatewayKind api.ServiceKind + + dialFunc func(network string, address string) (net.Conn, error) } const meshGatewayVal = "mesh" @@ -206,6 +209,10 @@ func (c *cmd) init() { flags.Merge(c.flags, c.http.ClientFlags()) flags.Merge(c.flags, c.http.MultiTenancyFlags()) c.help = flags.Usage(help, c.flags) + + c.dialFunc = func(network string, address string) (net.Conn, error) { + return net.DialTimeout(network, address, 3*time.Second) + } } // canBindInternal is here mainly so we can unit test this with a constant net.Addr list @@ -486,6 +493,10 @@ func (c *cmd) templateArgs() (*BootstrapTplArgs, error) { return nil, err } + if err := checkDial(xdsAddr, c.dialFunc); err != nil { + return nil, fmt.Errorf("error dialing xDS address: %w", err) + } + adminAddr, adminPort, err := net.SplitHostPort(c.adminBind) if err != nil { return nil, fmt.Errorf("Invalid Consul HTTP address: %s", err) @@ -657,14 +668,24 @@ func (c *cmd) xdsAddress() (GRPC, error) { addr := c.grpcAddr if addr == "" { + // This lookup is a UX optimization and requires acl policy agent:read, + // which sidecars may not have. port, protocol, err := c.lookupXDSPort() if err != nil { - c.UI.Error(fmt.Sprintf("Error connecting to Consul agent: %s", err)) + if strings.Contains(err.Error(), "Permission denied") { + // Token did not have agent:read. Log and proceed with defaults. + c.UI.Info(fmt.Sprintf("Could not query /v1/agent/self for xDS ports: %s", err)) + } else { + // If not a permission denied error, gRPC is explicitly disabled + // or something went fatally wrong. + return g, fmt.Errorf("Error looking up xDS port: %s", err) + } } if port <= 0 { // This is the dev mode default and recommended production setting if // enabled. port = 8502 + c.UI.Info("-grpc-addr not provided and unable to discover a gRPC address for xDS. Defaulting to localhost:8502") } addr = fmt.Sprintf("%vlocalhost:%v", protocol, port) } @@ -725,6 +746,9 @@ func (c *cmd) lookupXDSPort() (int, string, error) { var resp response if err := mapstructure.Decode(self, &resp); err == nil { + if resp.XDS.Ports.TLS < 0 && resp.XDS.Ports.Plaintext < 0 { + return 0, "", fmt.Errorf("agent has grpc disabled") + } if resp.XDS.Ports.TLS > 0 { return resp.XDS.Ports.TLS, "https://", nil } @@ -733,13 +757,13 @@ func (c *cmd) lookupXDSPort() (int, string, error) { } } - // Fallback to old API for the case where a new consul CLI is being used with - // an older API version. + // If above TLS and Plaintext ports are both 0, fallback to + // old API for the case where a new consul CLI is being used + // with an older API version. cfg, ok := self["DebugConfig"] if !ok { return 0, "", fmt.Errorf("unexpected agent response: no debug config") } - // TODO what does this mean? What did the old API look like? How does this affect compatibility? port, ok := cfg["GRPCPort"] if !ok { return 0, "", fmt.Errorf("agent does not have grpc port enabled") @@ -752,6 +776,25 @@ func (c *cmd) lookupXDSPort() (int, string, error) { return int(portN), "", nil } +func checkDial(g GRPC, dial func(string, string) (net.Conn, error)) error { + var ( + conn net.Conn + err error + ) + if g.AgentSocket != "" { + conn, err = dial("unix", g.AgentSocket) + } else { + conn, err = dial("tcp", fmt.Sprintf("%s:%s", g.AgentAddress, g.AgentPort)) + } + if err != nil { + return err + } + if conn != nil { + conn.Close() + } + return nil +} + func (c *cmd) Synopsis() string { return synopsis } diff --git a/command/connect/envoy/envoy_test.go b/command/connect/envoy/envoy_test.go index 31c078dc9..74d11f2ca 100644 --- a/command/connect/envoy/envoy_test.go +++ b/command/connect/envoy/envoy_test.go @@ -10,6 +10,7 @@ import ( "path/filepath" "strings" "testing" + "time" "github.com/stretchr/testify/assert" @@ -116,8 +117,9 @@ type generateConfigTestCase struct { Files map[string]string ProxyConfig map[string]interface{} NamespacesEnabled bool - XDSPorts agent.GRPCPorts // only used for testing custom-configured grpc port - AgentSelf110 bool // fake the agent API from versions v1.10 and earlier + XDSPorts agent.GRPCPorts // used to mock an agent's configured gRPC ports. Plaintext defaults to 8502 and TLS defaults to 8503. + AgentSelf110 bool // fake the agent API from versions v1.10 and earlier + DialFunc func(network, address string) (net.Conn, error) // defaults to a no-op function. Overwrite to test error handling. WantArgs BootstrapTplArgs WantErr string } @@ -139,6 +141,23 @@ func TestGenerateConfig(t *testing.T) { Flags: []string{"-node-name", "test-node"}, WantErr: "'-node-name' requires '-proxy-id'", }, + { + Name: "gRPC disabled", + Flags: []string{"-proxy-id", "test-proxy"}, + XDSPorts: agent.GRPCPorts{ + Plaintext: -1, + TLS: -1, + }, + WantErr: "agent has grpc disabled", + }, + { + Name: "connection not available", + Flags: []string{"-proxy-id", "test-proxy"}, + DialFunc: func(network, address string) (net.Conn, error) { + return net.DialTimeout(network, address, time.Second) + }, + WantErr: "connection refused", + }, { Name: "defaults", Flags: []string{"-proxy-id", "test-proxy"}, @@ -544,22 +563,8 @@ func TestGenerateConfig(t *testing.T) { }, }, { - Name: "missing-ca-file", - Flags: []string{"-proxy-id", "test-proxy", "-ca-file", "some/path"}, - WantArgs: BootstrapTplArgs{ - ProxyCluster: "test-proxy", - ProxyID: "test-proxy", - // We don't know this til after the lookup so it will be empty in the - // initial args call we are testing here. - ProxySourceService: "", - // Should resolve IP, note this might not resolve the same way - // everywhere which might make this test brittle but not sure what else - // to do. - GRPC: GRPC{ - AgentAddress: "127.0.0.1", - AgentPort: "8502", - }, - }, + Name: "missing-ca-file", + Flags: []string{"-proxy-id", "test-proxy", "-ca-file", "some/path"}, WantErr: "Error loading CA File: open some/path: no such file or directory", }, { @@ -590,22 +595,8 @@ func TestGenerateConfig(t *testing.T) { }, }, { - Name: "missing-ca-path", - Flags: []string{"-proxy-id", "test-proxy", "-ca-path", "some/path"}, - WantArgs: BootstrapTplArgs{ - ProxyCluster: "test-proxy", - ProxyID: "test-proxy", - // We don't know this til after the lookup so it will be empty in the - // initial args call we are testing here. - ProxySourceService: "", - // Should resolve IP, note this might not resolve the same way - // everywhere which might make this test brittle but not sure what else - // to do. - GRPC: GRPC{ - AgentAddress: "127.0.0.1", - AgentPort: "8502", - }, - }, + Name: "missing-ca-path", + Flags: []string{"-proxy-id", "test-proxy", "-ca-path", "some/path"}, WantErr: "lstat some/path: no such file or directory", }, { @@ -1084,6 +1075,11 @@ func TestGenerateConfig(t *testing.T) { } } + // Default the ports + if tc.XDSPorts.TLS == 0 && tc.XDSPorts.Plaintext == 0 { + tc.XDSPorts.Plaintext = 8502 + } + // Run a mock agent API that just always returns the proxy config in the // test. var srv *httptest.Server @@ -1106,6 +1102,14 @@ func TestGenerateConfig(t *testing.T) { // explicitly set the client to one which can connect to the httptest.Server c.client = client + if tc.DialFunc != nil { + c.dialFunc = tc.DialFunc + } else { + c.dialFunc = func(_, _ string) (net.Conn, error) { + return nil, nil + } + } + // Run the command myFlags := copyAndReplaceAll(tc.Flags, "@@TEMPDIR@@", testDirPrefix) args := append([]string{"-bootstrap"}, myFlags...)