xds: ensure that all connect timeout configs can apply equally to tproxy direct dial connections (#12711)

Just like standard upstreams the order of applicability in descending precedence:

1. caller's `service-defaults` upstream override for destination
2. caller's `service-defaults` upstream defaults
3. destination's `service-resolver` ConnectTimeout
4. system default of 5s

Co-authored-by: mrspanishviking <kcardenas@hashicorp.com>
This commit is contained in:
R.B. Boyer 2022-04-07 16:58:21 -05:00 committed by GitHub
parent 9e06543a4f
commit f4eac06b21
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 198 additions and 45 deletions

3
.changelog/12711.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
xds: ensure that all connect timeout configs can apply equally to tproxy direct dial connections
```

View File

@ -59,6 +59,12 @@ func TestDiscoveryChainEndpoint_Get(t *testing.T) {
t := structs.NewDiscoveryTarget(service, serviceSubset, namespace, partition, datacenter)
t.SNI = connect.TargetSNI(t, connect.TestClusterID+".consul")
t.Name = t.SNI
t.ConnectTimeout = 5 * time.Second // default
return t
}
targetWithConnectTimeout := func(t *structs.DiscoveryTarget, connectTimeout time.Duration) *structs.DiscoveryTarget {
t.ConnectTimeout = connectTimeout
return t
}
@ -237,7 +243,10 @@ func TestDiscoveryChainEndpoint_Get(t *testing.T) {
},
},
Targets: map[string]*structs.DiscoveryTarget{
"web.default.default.dc1": newTarget("web", "", "default", "default", "dc1"),
"web.default.default.dc1": targetWithConnectTimeout(
newTarget("web", "", "default", "default", "dc1"),
33*time.Second,
),
},
},
}

View File

@ -928,6 +928,9 @@ RESOLVE_AGAIN:
}
}
// Expose a copy of this on the targets for ease of access.
target.ConnectTimeout = connectTimeout
// Build node.
node := &structs.DiscoveryGraphNode{
Type: structs.DiscoveryGraphNodeTypeResolver,

View File

@ -293,7 +293,10 @@ func testcase_RouterWithDefaults_NoSplit_WithResolver() compileTestCase {
},
},
Targets: map[string]*structs.DiscoveryTarget{
"main.default.default.dc1": newTarget("main", "", "default", "default", "dc1", nil),
"main.default.default.dc1": targetWithConnectTimeout(
newTarget("main", "", "default", "default", "dc1", nil),
33*time.Second,
),
},
}
@ -494,7 +497,10 @@ func testcase_RouterWithDefaults_WithNoopSplit_WithResolver() compileTestCase {
},
},
Targets: map[string]*structs.DiscoveryTarget{
"main.default.default.dc1": newTarget("main", "", "default", "default", "dc1", nil),
"main.default.default.dc1": targetWithConnectTimeout(
newTarget("main", "", "default", "default", "dc1", nil),
33*time.Second,
),
},
}
@ -687,7 +693,10 @@ func testcase_NoopSplit_WithResolver() compileTestCase {
},
},
Targets: map[string]*structs.DiscoveryTarget{
"main.default.default.dc1": newTarget("main", "", "default", "default", "dc1", nil),
"main.default.default.dc1": targetWithConnectTimeout(
newTarget("main", "", "default", "default", "dc1", nil),
33*time.Second,
),
},
}
@ -1847,8 +1856,14 @@ func testcase_MultiDatacenterCanary() compileTestCase {
},
},
Targets: map[string]*structs.DiscoveryTarget{
"main.default.default.dc2": newTarget("main", "", "default", "default", "dc2", nil),
"main.default.default.dc3": newTarget("main", "", "default", "default", "dc3", nil),
"main.default.default.dc2": targetWithConnectTimeout(
newTarget("main", "", "default", "default", "dc2", nil),
33*time.Second,
),
"main.default.default.dc3": targetWithConnectTimeout(
newTarget("main", "", "default", "default", "dc3", nil),
33*time.Second,
),
},
}
return compileTestCase{entries: entries, expect: expect}
@ -2780,8 +2795,14 @@ func newTarget(service, serviceSubset, namespace, partition, datacenter string,
t := structs.NewDiscoveryTarget(service, serviceSubset, namespace, partition, datacenter)
t.SNI = connect.TargetSNI(t, "trustdomain.consul")
t.Name = t.SNI
t.ConnectTimeout = 5 * time.Second // default
if modFn != nil {
modFn(t)
}
return t
}
func targetWithConnectTimeout(t *structs.DiscoveryTarget, connectTimeout time.Duration) *structs.DiscoveryTarget {
t.ConnectTimeout = connectTimeout
return t
}

View File

@ -31,6 +31,12 @@ func TestDiscoveryChainRead(t *testing.T) {
t := structs.NewDiscoveryTarget(service, serviceSubset, namespace, partition, datacenter)
t.SNI = connect.TargetSNI(t, connect.TestClusterID+".consul")
t.Name = t.SNI
t.ConnectTimeout = 5 * time.Second // default
return t
}
targetWithConnectTimeout := func(t *structs.DiscoveryTarget, connectTimeout time.Duration) *structs.DiscoveryTarget {
t.ConnectTimeout = connectTimeout
return t
}
@ -258,8 +264,14 @@ func TestDiscoveryChainRead(t *testing.T) {
},
},
Targets: map[string]*structs.DiscoveryTarget{
"web.default.default.dc1": newTarget("web", "", "default", "default", "dc1"),
"web.default.default.dc2": newTarget("web", "", "default", "default", "dc2"),
"web.default.default.dc1": targetWithConnectTimeout(
newTarget("web", "", "default", "default", "dc1"),
33*time.Second,
),
"web.default.default.dc2": targetWithConnectTimeout(
newTarget("web", "", "default", "default", "dc2"),
33*time.Second,
),
},
}
if !reflect.DeepEqual(expect, value.Chain) {
@ -268,12 +280,18 @@ func TestDiscoveryChainRead(t *testing.T) {
})
}))
expectTarget_DC1 := newTarget("web", "", "default", "default", "dc1")
expectTarget_DC1 := targetWithConnectTimeout(
newTarget("web", "", "default", "default", "dc1"),
33*time.Second,
)
expectTarget_DC1.MeshGateway = structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeLocal,
}
expectTarget_DC2 := newTarget("web", "", "default", "default", "dc2")
expectTarget_DC2 := targetWithConnectTimeout(
newTarget("web", "", "default", "default", "dc2"),
33*time.Second,
)
expectTarget_DC2.MeshGateway = structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeLocal,
}

View File

@ -116,12 +116,12 @@ func (s *handlerConnectProxy) initialize(ctx context.Context) (ConfigSnapshot, e
continue
}
snap.ConnectProxy.UpstreamConfig[uid] = &u
// This can be true if the upstream is a synthetic entry populated from centralized upstream config.
// Watches should not be created for them.
if u.CentrallyConfigured {
continue
}
snap.ConnectProxy.UpstreamConfig[uid] = &u
dc := s.source.Datacenter
if u.Datacenter != "" {

View File

@ -1,6 +1,8 @@
package proxycfg
import (
"time"
"github.com/mitchellh/go-testing-interface"
"github.com/hashicorp/consul/agent/cache"
@ -322,7 +324,11 @@ func TestConfigSnapshotTransparentProxyDialDirectly(t testing.T) *ConfigSnapshot
mongo = structs.NewServiceName("mongo", nil)
mongoUID = NewUpstreamIDFromServiceName(mongo)
mongoChain = discoverychain.TestCompileConfigEntries(t, "mongo", "default", "default", "dc1", connect.TestClusterID+".consul", nil)
mongoChain = discoverychain.TestCompileConfigEntries(t, "mongo", "default", "default", "dc1", connect.TestClusterID+".consul", nil, &structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: "mongo",
ConnectTimeout: 33 * time.Second,
})
db = structs.NewServiceName("db", nil)
)

View File

@ -208,6 +208,8 @@ type DiscoveryTarget struct {
MeshGateway MeshGatewayConfig `json:",omitempty"`
Subset ServiceResolverSubset `json:",omitempty"`
ConnectTimeout time.Duration `json:",omitempty"`
// External is true if this target is outside of this consul cluster.
External bool `json:",omitempty"`
@ -221,6 +223,42 @@ type DiscoveryTarget struct {
Name string `json:",omitempty"`
}
func (t *DiscoveryTarget) MarshalJSON() ([]byte, error) {
type Alias DiscoveryTarget
exported := struct {
ConnectTimeout string `json:",omitempty"`
*Alias
}{
ConnectTimeout: t.ConnectTimeout.String(),
Alias: (*Alias)(t),
}
if t.ConnectTimeout == 0 {
exported.ConnectTimeout = ""
}
return json.Marshal(exported)
}
func (t *DiscoveryTarget) UnmarshalJSON(data []byte) error {
type Alias DiscoveryTarget
aux := &struct {
ConnectTimeout string
*Alias
}{
Alias: (*Alias)(t),
}
if err := lib.UnmarshalJSON(data, &aux); err != nil {
return err
}
var err error
if aux.ConnectTimeout != "" {
if t.ConnectTimeout, err = time.ParseDuration(aux.ConnectTimeout); err != nil {
return err
}
}
return nil
}
func NewDiscoveryTarget(service, serviceSubset, namespace, partition, datacenter string) *DiscoveryTarget {
t := &DiscoveryTarget{
Service: service,

View File

@ -173,9 +173,15 @@ func makePassthroughClusters(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message,
})
}
for _, target := range cfgSnap.ConnectProxy.PassthroughUpstreams {
for tid := range target {
uid := proxycfg.NewUpstreamIDFromTargetID(tid)
for uid, chain := range cfgSnap.ConnectProxy.DiscoveryChain {
targetMap, ok := cfgSnap.ConnectProxy.PassthroughUpstreams[uid]
if !ok {
continue
}
for targetID := range targetMap {
uid := proxycfg.NewUpstreamIDFromTargetID(targetID)
sni := connect.ServiceSNI(
uid.Name, "", uid.NamespaceOrDefault(), uid.PartitionOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain)
@ -190,10 +196,13 @@ func makePassthroughClusters(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message,
},
LbPolicy: envoy_cluster_v3.Cluster_CLUSTER_PROVIDED,
// TODO(tproxy) This should use the connection timeout configured on the upstream's config entry
ConnectTimeout: ptypes.DurationProto(5 * time.Second),
}
if discoTarget, ok := chain.Targets[targetID]; ok && discoTarget.ConnectTimeout > 0 {
c.ConnectTimeout = ptypes.DurationProto(discoTarget.ConnectTimeout)
}
spiffeID := connect.SpiffeIDService{
Host: cfgSnap.Roots.TrustDomain,
Partition: uid.PartitionOrDefault(),

View File

@ -210,7 +210,7 @@
"resourceApiVersion": "V3"
}
},
"connectTimeout": "5s",
"connectTimeout": "33s",
"circuitBreakers": {
},
@ -305,7 +305,7 @@
"@type": "type.googleapis.com/envoy.config.cluster.v3.Cluster",
"name": "passthrough~mongo.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul",
"type": "ORIGINAL_DST",
"connectTimeout": "5s",
"connectTimeout": "33s",
"lbPolicy": "CLUSTER_PROVIDED",
"transportSocket": {
"name": "tls",

View File

@ -236,7 +236,44 @@ type DiscoveryTarget struct {
MeshGateway MeshGatewayConfig
Subset ServiceResolverSubset
ConnectTimeout time.Duration
External bool
SNI string
Name string
}
func (t *DiscoveryTarget) MarshalJSON() ([]byte, error) {
type Alias DiscoveryTarget
exported := &struct {
ConnectTimeout string `json:",omitempty"`
*Alias
}{
ConnectTimeout: t.ConnectTimeout.String(),
Alias: (*Alias)(t),
}
if t.ConnectTimeout == 0 {
exported.ConnectTimeout = ""
}
return json.Marshal(exported)
}
func (t *DiscoveryTarget) UnmarshalJSON(data []byte) error {
type Alias DiscoveryTarget
aux := &struct {
ConnectTimeout string
*Alias
}{
Alias: (*Alias)(t),
}
if err := json.Unmarshal(data, &aux); err != nil {
return err
}
var err error
if aux.ConnectTimeout != "" {
if t.ConnectTimeout, err = time.ParseDuration(aux.ConnectTimeout); err != nil {
return err
}
}
return nil
}

View File

@ -51,6 +51,7 @@ func TestAPI_DiscoveryChain_Get(t *testing.T) {
Service: "web",
Namespace: "default",
Datacenter: "dc1",
ConnectTimeout: 5 * time.Second,
SNI: "web.default.dc1.internal." + testClusterID + ".consul",
Name: "web.default.dc1.internal." + testClusterID + ".consul",
},
@ -92,6 +93,7 @@ func TestAPI_DiscoveryChain_Get(t *testing.T) {
Service: "web",
Namespace: "default",
Datacenter: "dc2",
ConnectTimeout: 5 * time.Second,
SNI: "web.default.dc2.internal." + testClusterID + ".consul",
Name: "web.default.dc2.internal." + testClusterID + ".consul",
},
@ -138,6 +140,7 @@ func TestAPI_DiscoveryChain_Get(t *testing.T) {
Service: "web",
Namespace: "default",
Datacenter: "dc1",
ConnectTimeout: 33 * time.Second,
SNI: "web.default.dc1.internal." + testClusterID + ".consul",
Name: "web.default.dc1.internal." + testClusterID + ".consul",
},
@ -186,6 +189,7 @@ func TestAPI_DiscoveryChain_Get(t *testing.T) {
MeshGateway: MeshGatewayConfig{
Mode: MeshGatewayModeLocal,
},
ConnectTimeout: 22 * time.Second,
SNI: "web.default.dc2.internal." + testClusterID + ".consul",
Name: "web.default.dc2.internal." + testClusterID + ".consul",
},

View File

@ -237,6 +237,10 @@ A single node in the compiled discovery chain.
- `External` `(bool: false)` - True if this target is outside of this consul cluster.
- `ConnectTimeout` `(duration)` - Copy of the underlying `service-resolver`
[`ConnectTimeout`](/docs/connect/config-entries/service-resolver#connecttimeout)
field. If one is not defined the default of `5s` is returned.
- `SNI` `(string)` - This value should be used as the
[SNI](https://en.wikipedia.org/wiki/Server_Name_Indication) value when
connecting to this set of endpoints over TLS.

View File

@ -169,8 +169,9 @@ transparent proxy's datacenter. Services can also dial explicit upstreams in oth
in the datacenter `dc2`.
* In the deployment configuration where a [single Consul datacenter spans multiple Kubernetes clusters](/docs/k8s/installation/deployment-configurations/single-dc-multi-k8s), services in one Kubernetes cluster must explicitly dial a service in another Kubernetes cluster using the [consul.hashicorp.com/connect-service-upstreams](/docs/k8s/annotations-and-labels#consul-hashicorp-com-connect-service-upstreams) annotation. An example would be
`"consul.hashicorp.com/connect-service-upstreams": "my-service:1234"`, where `my-service` is the service that exists in another Kubernetes cluster and is exposed on port `1234`. Although Transparent Proxy is enabled, KubeDNS is not utilized when communicating between services existing on separate Kubernetes clusters.
* When dialing headless services the request will be proxied using a plain TCP proxy with a 5s connection timeout.
Currently the upstream's protocol and connection timeout are not considered.
* When dialing headless services, the request will be proxied using a plain TCP
proxy. The upstream's protocol is not considered.
## Using Transparent Proxy