From f4eac06b21b176bac168f7d75ee97761326da976 Mon Sep 17 00:00:00 2001 From: "R.B. Boyer" <4903+rboyer@users.noreply.github.com> Date: Thu, 7 Apr 2022 16:58:21 -0500 Subject: [PATCH] xds: ensure that all connect timeout configs can apply equally to tproxy direct dial connections (#12711) Just like standard upstreams the order of applicability in descending precedence: 1. caller's `service-defaults` upstream override for destination 2. caller's `service-defaults` upstream defaults 3. destination's `service-resolver` ConnectTimeout 4. system default of 5s Co-authored-by: mrspanishviking --- .changelog/12711.txt | 3 ++ agent/consul/discovery_chain_endpoint_test.go | 11 ++++- agent/consul/discoverychain/compile.go | 3 ++ agent/consul/discoverychain/compile_test.go | 31 ++++++++++-- agent/discovery_chain_endpoint_test.go | 26 ++++++++-- agent/proxycfg/connect_proxy.go | 2 +- agent/proxycfg/testing_tproxy.go | 8 +++- agent/structs/discovery_chain.go | 38 +++++++++++++++ agent/xds/clusters.go | 17 +++++-- ...ial-instances-directly.envoy-1-20-x.golden | 4 +- api/discovery_chain.go | 47 +++++++++++++++++-- api/discovery_chain_test.go | 44 +++++++++-------- .../connect/l7-traffic/discovery-chain.mdx | 4 ++ .../docs/connect/transparent-proxy.mdx | 5 +- 14 files changed, 198 insertions(+), 45 deletions(-) create mode 100644 .changelog/12711.txt diff --git a/.changelog/12711.txt b/.changelog/12711.txt new file mode 100644 index 000000000..3d1400550 --- /dev/null +++ b/.changelog/12711.txt @@ -0,0 +1,3 @@ +```release-note:improvement +xds: ensure that all connect timeout configs can apply equally to tproxy direct dial connections +``` diff --git a/agent/consul/discovery_chain_endpoint_test.go b/agent/consul/discovery_chain_endpoint_test.go index 1f9a82f14..97ae5b124 100644 --- a/agent/consul/discovery_chain_endpoint_test.go +++ b/agent/consul/discovery_chain_endpoint_test.go @@ -59,6 +59,12 @@ func TestDiscoveryChainEndpoint_Get(t *testing.T) { t := structs.NewDiscoveryTarget(service, serviceSubset, namespace, partition, datacenter) t.SNI = connect.TargetSNI(t, connect.TestClusterID+".consul") t.Name = t.SNI + t.ConnectTimeout = 5 * time.Second // default + return t + } + + targetWithConnectTimeout := func(t *structs.DiscoveryTarget, connectTimeout time.Duration) *structs.DiscoveryTarget { + t.ConnectTimeout = connectTimeout return t } @@ -237,7 +243,10 @@ func TestDiscoveryChainEndpoint_Get(t *testing.T) { }, }, Targets: map[string]*structs.DiscoveryTarget{ - "web.default.default.dc1": newTarget("web", "", "default", "default", "dc1"), + "web.default.default.dc1": targetWithConnectTimeout( + newTarget("web", "", "default", "default", "dc1"), + 33*time.Second, + ), }, }, } diff --git a/agent/consul/discoverychain/compile.go b/agent/consul/discoverychain/compile.go index 0567b8b90..ed664878b 100644 --- a/agent/consul/discoverychain/compile.go +++ b/agent/consul/discoverychain/compile.go @@ -928,6 +928,9 @@ RESOLVE_AGAIN: } } + // Expose a copy of this on the targets for ease of access. + target.ConnectTimeout = connectTimeout + // Build node. node := &structs.DiscoveryGraphNode{ Type: structs.DiscoveryGraphNodeTypeResolver, diff --git a/agent/consul/discoverychain/compile_test.go b/agent/consul/discoverychain/compile_test.go index 9a3dde647..221ac757f 100644 --- a/agent/consul/discoverychain/compile_test.go +++ b/agent/consul/discoverychain/compile_test.go @@ -293,7 +293,10 @@ func testcase_RouterWithDefaults_NoSplit_WithResolver() compileTestCase { }, }, Targets: map[string]*structs.DiscoveryTarget{ - "main.default.default.dc1": newTarget("main", "", "default", "default", "dc1", nil), + "main.default.default.dc1": targetWithConnectTimeout( + newTarget("main", "", "default", "default", "dc1", nil), + 33*time.Second, + ), }, } @@ -494,7 +497,10 @@ func testcase_RouterWithDefaults_WithNoopSplit_WithResolver() compileTestCase { }, }, Targets: map[string]*structs.DiscoveryTarget{ - "main.default.default.dc1": newTarget("main", "", "default", "default", "dc1", nil), + "main.default.default.dc1": targetWithConnectTimeout( + newTarget("main", "", "default", "default", "dc1", nil), + 33*time.Second, + ), }, } @@ -687,7 +693,10 @@ func testcase_NoopSplit_WithResolver() compileTestCase { }, }, Targets: map[string]*structs.DiscoveryTarget{ - "main.default.default.dc1": newTarget("main", "", "default", "default", "dc1", nil), + "main.default.default.dc1": targetWithConnectTimeout( + newTarget("main", "", "default", "default", "dc1", nil), + 33*time.Second, + ), }, } @@ -1847,8 +1856,14 @@ func testcase_MultiDatacenterCanary() compileTestCase { }, }, Targets: map[string]*structs.DiscoveryTarget{ - "main.default.default.dc2": newTarget("main", "", "default", "default", "dc2", nil), - "main.default.default.dc3": newTarget("main", "", "default", "default", "dc3", nil), + "main.default.default.dc2": targetWithConnectTimeout( + newTarget("main", "", "default", "default", "dc2", nil), + 33*time.Second, + ), + "main.default.default.dc3": targetWithConnectTimeout( + newTarget("main", "", "default", "default", "dc3", nil), + 33*time.Second, + ), }, } return compileTestCase{entries: entries, expect: expect} @@ -2780,8 +2795,14 @@ func newTarget(service, serviceSubset, namespace, partition, datacenter string, t := structs.NewDiscoveryTarget(service, serviceSubset, namespace, partition, datacenter) t.SNI = connect.TargetSNI(t, "trustdomain.consul") t.Name = t.SNI + t.ConnectTimeout = 5 * time.Second // default if modFn != nil { modFn(t) } return t } + +func targetWithConnectTimeout(t *structs.DiscoveryTarget, connectTimeout time.Duration) *structs.DiscoveryTarget { + t.ConnectTimeout = connectTimeout + return t +} diff --git a/agent/discovery_chain_endpoint_test.go b/agent/discovery_chain_endpoint_test.go index 3db87ba52..86ef96617 100644 --- a/agent/discovery_chain_endpoint_test.go +++ b/agent/discovery_chain_endpoint_test.go @@ -31,6 +31,12 @@ func TestDiscoveryChainRead(t *testing.T) { t := structs.NewDiscoveryTarget(service, serviceSubset, namespace, partition, datacenter) t.SNI = connect.TargetSNI(t, connect.TestClusterID+".consul") t.Name = t.SNI + t.ConnectTimeout = 5 * time.Second // default + return t + } + + targetWithConnectTimeout := func(t *structs.DiscoveryTarget, connectTimeout time.Duration) *structs.DiscoveryTarget { + t.ConnectTimeout = connectTimeout return t } @@ -258,8 +264,14 @@ func TestDiscoveryChainRead(t *testing.T) { }, }, Targets: map[string]*structs.DiscoveryTarget{ - "web.default.default.dc1": newTarget("web", "", "default", "default", "dc1"), - "web.default.default.dc2": newTarget("web", "", "default", "default", "dc2"), + "web.default.default.dc1": targetWithConnectTimeout( + newTarget("web", "", "default", "default", "dc1"), + 33*time.Second, + ), + "web.default.default.dc2": targetWithConnectTimeout( + newTarget("web", "", "default", "default", "dc2"), + 33*time.Second, + ), }, } if !reflect.DeepEqual(expect, value.Chain) { @@ -268,12 +280,18 @@ func TestDiscoveryChainRead(t *testing.T) { }) })) - expectTarget_DC1 := newTarget("web", "", "default", "default", "dc1") + expectTarget_DC1 := targetWithConnectTimeout( + newTarget("web", "", "default", "default", "dc1"), + 33*time.Second, + ) expectTarget_DC1.MeshGateway = structs.MeshGatewayConfig{ Mode: structs.MeshGatewayModeLocal, } - expectTarget_DC2 := newTarget("web", "", "default", "default", "dc2") + expectTarget_DC2 := targetWithConnectTimeout( + newTarget("web", "", "default", "default", "dc2"), + 33*time.Second, + ) expectTarget_DC2.MeshGateway = structs.MeshGatewayConfig{ Mode: structs.MeshGatewayModeLocal, } diff --git a/agent/proxycfg/connect_proxy.go b/agent/proxycfg/connect_proxy.go index d0849a01e..e23ae0662 100644 --- a/agent/proxycfg/connect_proxy.go +++ b/agent/proxycfg/connect_proxy.go @@ -116,12 +116,12 @@ func (s *handlerConnectProxy) initialize(ctx context.Context) (ConfigSnapshot, e continue } + snap.ConnectProxy.UpstreamConfig[uid] = &u // This can be true if the upstream is a synthetic entry populated from centralized upstream config. // Watches should not be created for them. if u.CentrallyConfigured { continue } - snap.ConnectProxy.UpstreamConfig[uid] = &u dc := s.source.Datacenter if u.Datacenter != "" { diff --git a/agent/proxycfg/testing_tproxy.go b/agent/proxycfg/testing_tproxy.go index 4c04c9346..e593a51e9 100644 --- a/agent/proxycfg/testing_tproxy.go +++ b/agent/proxycfg/testing_tproxy.go @@ -1,6 +1,8 @@ package proxycfg import ( + "time" + "github.com/mitchellh/go-testing-interface" "github.com/hashicorp/consul/agent/cache" @@ -322,7 +324,11 @@ func TestConfigSnapshotTransparentProxyDialDirectly(t testing.T) *ConfigSnapshot mongo = structs.NewServiceName("mongo", nil) mongoUID = NewUpstreamIDFromServiceName(mongo) - mongoChain = discoverychain.TestCompileConfigEntries(t, "mongo", "default", "default", "dc1", connect.TestClusterID+".consul", nil) + mongoChain = discoverychain.TestCompileConfigEntries(t, "mongo", "default", "default", "dc1", connect.TestClusterID+".consul", nil, &structs.ServiceResolverConfigEntry{ + Kind: structs.ServiceResolver, + Name: "mongo", + ConnectTimeout: 33 * time.Second, + }) db = structs.NewServiceName("db", nil) ) diff --git a/agent/structs/discovery_chain.go b/agent/structs/discovery_chain.go index c2738f842..046ec1c4d 100644 --- a/agent/structs/discovery_chain.go +++ b/agent/structs/discovery_chain.go @@ -208,6 +208,8 @@ type DiscoveryTarget struct { MeshGateway MeshGatewayConfig `json:",omitempty"` Subset ServiceResolverSubset `json:",omitempty"` + ConnectTimeout time.Duration `json:",omitempty"` + // External is true if this target is outside of this consul cluster. External bool `json:",omitempty"` @@ -221,6 +223,42 @@ type DiscoveryTarget struct { Name string `json:",omitempty"` } +func (t *DiscoveryTarget) MarshalJSON() ([]byte, error) { + type Alias DiscoveryTarget + exported := struct { + ConnectTimeout string `json:",omitempty"` + *Alias + }{ + ConnectTimeout: t.ConnectTimeout.String(), + Alias: (*Alias)(t), + } + if t.ConnectTimeout == 0 { + exported.ConnectTimeout = "" + } + + return json.Marshal(exported) +} + +func (t *DiscoveryTarget) UnmarshalJSON(data []byte) error { + type Alias DiscoveryTarget + aux := &struct { + ConnectTimeout string + *Alias + }{ + Alias: (*Alias)(t), + } + if err := lib.UnmarshalJSON(data, &aux); err != nil { + return err + } + var err error + if aux.ConnectTimeout != "" { + if t.ConnectTimeout, err = time.ParseDuration(aux.ConnectTimeout); err != nil { + return err + } + } + return nil +} + func NewDiscoveryTarget(service, serviceSubset, namespace, partition, datacenter string) *DiscoveryTarget { t := &DiscoveryTarget{ Service: service, diff --git a/agent/xds/clusters.go b/agent/xds/clusters.go index 63442eb51..bbbad4a8b 100644 --- a/agent/xds/clusters.go +++ b/agent/xds/clusters.go @@ -173,9 +173,15 @@ func makePassthroughClusters(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, }) } - for _, target := range cfgSnap.ConnectProxy.PassthroughUpstreams { - for tid := range target { - uid := proxycfg.NewUpstreamIDFromTargetID(tid) + for uid, chain := range cfgSnap.ConnectProxy.DiscoveryChain { + targetMap, ok := cfgSnap.ConnectProxy.PassthroughUpstreams[uid] + if !ok { + continue + } + + for targetID := range targetMap { + + uid := proxycfg.NewUpstreamIDFromTargetID(targetID) sni := connect.ServiceSNI( uid.Name, "", uid.NamespaceOrDefault(), uid.PartitionOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain) @@ -190,10 +196,13 @@ func makePassthroughClusters(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, }, LbPolicy: envoy_cluster_v3.Cluster_CLUSTER_PROVIDED, - // TODO(tproxy) This should use the connection timeout configured on the upstream's config entry ConnectTimeout: ptypes.DurationProto(5 * time.Second), } + if discoTarget, ok := chain.Targets[targetID]; ok && discoTarget.ConnectTimeout > 0 { + c.ConnectTimeout = ptypes.DurationProto(discoTarget.ConnectTimeout) + } + spiffeID := connect.SpiffeIDService{ Host: cfgSnap.Roots.TrustDomain, Partition: uid.PartitionOrDefault(), diff --git a/agent/xds/testdata/clusters/transparent-proxy-dial-instances-directly.envoy-1-20-x.golden b/agent/xds/testdata/clusters/transparent-proxy-dial-instances-directly.envoy-1-20-x.golden index d920c11a4..ff729a66d 100644 --- a/agent/xds/testdata/clusters/transparent-proxy-dial-instances-directly.envoy-1-20-x.golden +++ b/agent/xds/testdata/clusters/transparent-proxy-dial-instances-directly.envoy-1-20-x.golden @@ -210,7 +210,7 @@ "resourceApiVersion": "V3" } }, - "connectTimeout": "5s", + "connectTimeout": "33s", "circuitBreakers": { }, @@ -305,7 +305,7 @@ "@type": "type.googleapis.com/envoy.config.cluster.v3.Cluster", "name": "passthrough~mongo.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", "type": "ORIGINAL_DST", - "connectTimeout": "5s", + "connectTimeout": "33s", "lbPolicy": "CLUSTER_PROVIDED", "transportSocket": { "name": "tls", diff --git a/api/discovery_chain.go b/api/discovery_chain.go index d198b2bb5..4217603cf 100644 --- a/api/discovery_chain.go +++ b/api/discovery_chain.go @@ -234,9 +234,46 @@ type DiscoveryTarget struct { Namespace string Datacenter string - MeshGateway MeshGatewayConfig - Subset ServiceResolverSubset - External bool - SNI string - Name string + MeshGateway MeshGatewayConfig + Subset ServiceResolverSubset + ConnectTimeout time.Duration + External bool + SNI string + Name string +} + +func (t *DiscoveryTarget) MarshalJSON() ([]byte, error) { + type Alias DiscoveryTarget + exported := &struct { + ConnectTimeout string `json:",omitempty"` + *Alias + }{ + ConnectTimeout: t.ConnectTimeout.String(), + Alias: (*Alias)(t), + } + if t.ConnectTimeout == 0 { + exported.ConnectTimeout = "" + } + + return json.Marshal(exported) +} + +func (t *DiscoveryTarget) UnmarshalJSON(data []byte) error { + type Alias DiscoveryTarget + aux := &struct { + ConnectTimeout string + *Alias + }{ + Alias: (*Alias)(t), + } + if err := json.Unmarshal(data, &aux); err != nil { + return err + } + var err error + if aux.ConnectTimeout != "" { + if t.ConnectTimeout, err = time.ParseDuration(aux.ConnectTimeout); err != nil { + return err + } + } + return nil } diff --git a/api/discovery_chain_test.go b/api/discovery_chain_test.go index 049ce3963..dd4fc0181 100644 --- a/api/discovery_chain_test.go +++ b/api/discovery_chain_test.go @@ -47,12 +47,13 @@ func TestAPI_DiscoveryChain_Get(t *testing.T) { }, Targets: map[string]*DiscoveryTarget{ "web.default.default.dc1": { - ID: "web.default.default.dc1", - Service: "web", - Namespace: "default", - Datacenter: "dc1", - SNI: "web.default.dc1.internal." + testClusterID + ".consul", - Name: "web.default.dc1.internal." + testClusterID + ".consul", + ID: "web.default.default.dc1", + Service: "web", + Namespace: "default", + Datacenter: "dc1", + ConnectTimeout: 5 * time.Second, + SNI: "web.default.dc1.internal." + testClusterID + ".consul", + Name: "web.default.dc1.internal." + testClusterID + ".consul", }, }, }, @@ -88,12 +89,13 @@ func TestAPI_DiscoveryChain_Get(t *testing.T) { }, Targets: map[string]*DiscoveryTarget{ "web.default.default.dc2": { - ID: "web.default.default.dc2", - Service: "web", - Namespace: "default", - Datacenter: "dc2", - SNI: "web.default.dc2.internal." + testClusterID + ".consul", - Name: "web.default.dc2.internal." + testClusterID + ".consul", + ID: "web.default.default.dc2", + Service: "web", + Namespace: "default", + Datacenter: "dc2", + ConnectTimeout: 5 * time.Second, + SNI: "web.default.dc2.internal." + testClusterID + ".consul", + Name: "web.default.dc2.internal." + testClusterID + ".consul", }, }, }, @@ -134,12 +136,13 @@ func TestAPI_DiscoveryChain_Get(t *testing.T) { }, Targets: map[string]*DiscoveryTarget{ "web.default.default.dc1": { - ID: "web.default.default.dc1", - Service: "web", - Namespace: "default", - Datacenter: "dc1", - SNI: "web.default.dc1.internal." + testClusterID + ".consul", - Name: "web.default.dc1.internal." + testClusterID + ".consul", + ID: "web.default.default.dc1", + Service: "web", + Namespace: "default", + Datacenter: "dc1", + ConnectTimeout: 33 * time.Second, + SNI: "web.default.dc1.internal." + testClusterID + ".consul", + Name: "web.default.dc1.internal." + testClusterID + ".consul", }, }, }, @@ -186,8 +189,9 @@ func TestAPI_DiscoveryChain_Get(t *testing.T) { MeshGateway: MeshGatewayConfig{ Mode: MeshGatewayModeLocal, }, - SNI: "web.default.dc2.internal." + testClusterID + ".consul", - Name: "web.default.dc2.internal." + testClusterID + ".consul", + ConnectTimeout: 22 * time.Second, + SNI: "web.default.dc2.internal." + testClusterID + ".consul", + Name: "web.default.dc2.internal." + testClusterID + ".consul", }, }, }, diff --git a/website/content/docs/connect/l7-traffic/discovery-chain.mdx b/website/content/docs/connect/l7-traffic/discovery-chain.mdx index dc9594e61..39ea48df5 100644 --- a/website/content/docs/connect/l7-traffic/discovery-chain.mdx +++ b/website/content/docs/connect/l7-traffic/discovery-chain.mdx @@ -237,6 +237,10 @@ A single node in the compiled discovery chain. - `External` `(bool: false)` - True if this target is outside of this consul cluster. +- `ConnectTimeout` `(duration)` - Copy of the underlying `service-resolver` + [`ConnectTimeout`](/docs/connect/config-entries/service-resolver#connecttimeout) + field. If one is not defined the default of `5s` is returned. + - `SNI` `(string)` - This value should be used as the [SNI](https://en.wikipedia.org/wiki/Server_Name_Indication) value when connecting to this set of endpoints over TLS. diff --git a/website/content/docs/connect/transparent-proxy.mdx b/website/content/docs/connect/transparent-proxy.mdx index 750c62e0f..1091091cf 100644 --- a/website/content/docs/connect/transparent-proxy.mdx +++ b/website/content/docs/connect/transparent-proxy.mdx @@ -169,8 +169,9 @@ transparent proxy's datacenter. Services can also dial explicit upstreams in oth in the datacenter `dc2`. * In the deployment configuration where a [single Consul datacenter spans multiple Kubernetes clusters](/docs/k8s/installation/deployment-configurations/single-dc-multi-k8s), services in one Kubernetes cluster must explicitly dial a service in another Kubernetes cluster using the [consul.hashicorp.com/connect-service-upstreams](/docs/k8s/annotations-and-labels#consul-hashicorp-com-connect-service-upstreams) annotation. An example would be `"consul.hashicorp.com/connect-service-upstreams": "my-service:1234"`, where `my-service` is the service that exists in another Kubernetes cluster and is exposed on port `1234`. Although Transparent Proxy is enabled, KubeDNS is not utilized when communicating between services existing on separate Kubernetes clusters. -* When dialing headless services the request will be proxied using a plain TCP proxy with a 5s connection timeout. -Currently the upstream's protocol and connection timeout are not considered. + +* When dialing headless services, the request will be proxied using a plain TCP + proxy. The upstream's protocol is not considered. ## Using Transparent Proxy