diff --git a/.changelog/13722.txt b/.changelog/13722.txt new file mode 100644 index 000000000..2cf90aa37 --- /dev/null +++ b/.changelog/13722.txt @@ -0,0 +1,3 @@ +```release-note:feature +streaming: Added topic that can be used to consume updates about the list of services in a datacenter +``` diff --git a/.changelog/13787.txt b/.changelog/13787.txt new file mode 100644 index 000000000..0682d70c4 --- /dev/null +++ b/.changelog/13787.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: when `acl token read` is used with the `-self` and `-expanded` flags, return an error instead of panicking +``` diff --git a/.changelog/13807.txt b/.changelog/13807.txt new file mode 100644 index 000000000..d1cec75f7 --- /dev/null +++ b/.changelog/13807.txt @@ -0,0 +1,6 @@ +```release-note: improvement +connect: Add Envoy 1.23.0 to support matrix +``` +```release-note: breaking-change +connect: Removes support for Envoy 1.19 +``` diff --git a/.changelog/13847.txt b/.changelog/13847.txt new file mode 100644 index 000000000..2bbe7e241 --- /dev/null +++ b/.changelog/13847.txt @@ -0,0 +1,3 @@ +```release-note:bug +connect: Fixed a goroutine/memory leak that would occur when using the ingress gateway. +``` diff --git a/.circleci/config.yml b/.circleci/config.yml index a9c434b46..af1a2f5c6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -24,9 +24,10 @@ references: VAULT_BINARY_VERSION: 1.9.4 GO_VERSION: 1.18.1 envoy-versions: &supported_envoy_versions - - &default_envoy_version "1.19.5" - - "1.20.4" - - "1.21.3" + - &default_envoy_version "1.20.6" + - "1.21.4" + - "1.22.2" + - "1.23.0" images: # When updating the Go version, remember to also update the versions in the # workflows section for go-test-lib jobs. @@ -875,8 +876,13 @@ jobs: environment: ENVOY_VERSION: << parameters.envoy-version >> XDS_TARGET: << parameters.xds-target >> + AWS_LAMBDA_REGION: us-west-2 steps: &ENVOY_INTEGRATION_TEST_STEPS - checkout + - assume-role: + access-key: AWS_ACCESS_KEY_ID_LAMBDA + secret-key: AWS_SECRET_ACCESS_KEY_LAMBDA + role-arn: ROLE_ARN_LAMBDA # Get go binary from workspace - attach_workspace: at: . diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0320a7bb1..fc2506abf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -254,8 +254,8 @@ jobs: docker.io/hashicorppreview/${{ env.repo }}:${{ env.dev_tag }}-${{ github.sha }} smoke_test: .github/scripts/verify_docker.sh v${{ env.version }} - build-docker-redhat: - name: Docker Build UBI Image for RedHat + build-docker-ubi-redhat: + name: Docker Build UBI Image for RedHat Registry needs: - get-product-version - build @@ -274,6 +274,39 @@ jobs: redhat_tag: scan.connect.redhat.com/ospid-60f9fdbec3a80eac643abedf/${{env.repo}}:${{env.version}}-ubi smoke_test: .github/scripts/verify_docker.sh v${{ env.version }} + build-docker-ubi-dockerhub: + name: Docker Build UBI Image for DockerHub + needs: + - get-product-version + - build + runs-on: ubuntu-latest + env: + repo: ${{github.event.repository.name}} + version: ${{needs.get-product-version.outputs.product-version}} + + steps: + - uses: actions/checkout@v2 + + # Strip everything but MAJOR.MINOR from the version string and add a `-dev` suffix + # This naming convention will be used ONLY for per-commit dev images + - name: Set docker dev tag + run: | + version="${{ env.version }}" + echo "dev_tag=${version%.*}-dev" >> $GITHUB_ENV + + - uses: hashicorp/actions-docker-build@v1 + with: + version: ${{env.version}} + target: ubi + arch: amd64 + tags: | + docker.io/hashicorp/${{env.repo}}:${{env.version}}-ubi + public.ecr.aws/hashicorp/${{env.repo}}:${{env.version}}-ubi + dev_tags: | + docker.io/hashicorppreview/${{ env.repo }}:${{ env.dev_tag }}-ubi + docker.io/hashicorppreview/${{ env.repo }}:${{ env.dev_tag }}-ubi-${{ github.sha }} + smoke_test: .github/scripts/verify_docker.sh v${{ env.version }} + verify-linux: needs: - get-product-version diff --git a/.gitignore b/.gitignore index b9630db38..faa096147 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ changelog.tmp exit-code Thumbs.db .idea +.vscode # MacOS .DS_Store diff --git a/.release/ci.hcl b/.release/ci.hcl index e422a9ce2..ceb11f759 100644 --- a/.release/ci.hcl +++ b/.release/ci.hcl @@ -178,6 +178,15 @@ event "promote-dev-docker" { } } +event "fossa-scan" { + depends = ["promote-dev-docker"] + action "fossa-scan" { + organization = "hashicorp" + repository = "crt-workflows-common" + workflow = "fossa-scan" + } +} + ## These are promotion and post-publish events ## they should be added to the end of the file after the verify event stanza. diff --git a/acl/acl_test.go b/acl/acl_test.go index 3ce0fa59b..fae37e5a6 100644 --- a/acl/acl_test.go +++ b/acl/acl_test.go @@ -27,6 +27,7 @@ func legacyPolicy(policy *Policy) *Policy { Keyring: policy.Keyring, Operator: policy.Operator, Mesh: policy.Mesh, + Peering: policy.Peering, }, } } @@ -117,6 +118,14 @@ func checkAllowMeshWrite(t *testing.T, authz Authorizer, prefix string, entCtx * require.Equal(t, Allow, authz.MeshWrite(entCtx)) } +func checkAllowPeeringRead(t *testing.T, authz Authorizer, prefix string, entCtx *AuthorizerContext) { + require.Equal(t, Allow, authz.PeeringRead(entCtx)) +} + +func checkAllowPeeringWrite(t *testing.T, authz Authorizer, prefix string, entCtx *AuthorizerContext) { + require.Equal(t, Allow, authz.PeeringWrite(entCtx)) +} + func checkAllowOperatorRead(t *testing.T, authz Authorizer, prefix string, entCtx *AuthorizerContext) { require.Equal(t, Allow, authz.OperatorRead(entCtx)) } @@ -241,6 +250,14 @@ func checkDenyMeshWrite(t *testing.T, authz Authorizer, prefix string, entCtx *A require.Equal(t, Deny, authz.MeshWrite(entCtx)) } +func checkDenyPeeringRead(t *testing.T, authz Authorizer, prefix string, entCtx *AuthorizerContext) { + require.Equal(t, Deny, authz.PeeringRead(entCtx)) +} + +func checkDenyPeeringWrite(t *testing.T, authz Authorizer, prefix string, entCtx *AuthorizerContext) { + require.Equal(t, Deny, authz.PeeringWrite(entCtx)) +} + func checkDenyOperatorRead(t *testing.T, authz Authorizer, prefix string, entCtx *AuthorizerContext) { require.Equal(t, Deny, authz.OperatorRead(entCtx)) } @@ -365,6 +382,14 @@ func checkDefaultMeshWrite(t *testing.T, authz Authorizer, prefix string, entCtx require.Equal(t, Default, authz.MeshWrite(entCtx)) } +func checkDefaultPeeringRead(t *testing.T, authz Authorizer, prefix string, entCtx *AuthorizerContext) { + require.Equal(t, Default, authz.PeeringRead(entCtx)) +} + +func checkDefaultPeeringWrite(t *testing.T, authz Authorizer, prefix string, entCtx *AuthorizerContext) { + require.Equal(t, Default, authz.PeeringWrite(entCtx)) +} + func checkDefaultOperatorRead(t *testing.T, authz Authorizer, prefix string, entCtx *AuthorizerContext) { require.Equal(t, Default, authz.OperatorRead(entCtx)) } @@ -446,6 +471,8 @@ func TestACL(t *testing.T) { {name: "DenyNodeWrite", check: checkDenyNodeWrite}, {name: "DenyMeshRead", check: checkDenyMeshRead}, {name: "DenyMeshWrite", check: checkDenyMeshWrite}, + {name: "DenyPeeringRead", check: checkDenyPeeringRead}, + {name: "DenyPeeringWrite", check: checkDenyPeeringWrite}, {name: "DenyOperatorRead", check: checkDenyOperatorRead}, {name: "DenyOperatorWrite", check: checkDenyOperatorWrite}, {name: "DenyPreparedQueryRead", check: checkDenyPreparedQueryRead}, @@ -480,6 +507,8 @@ func TestACL(t *testing.T) { {name: "AllowNodeWrite", check: checkAllowNodeWrite}, {name: "AllowMeshRead", check: checkAllowMeshRead}, {name: "AllowMeshWrite", check: checkAllowMeshWrite}, + {name: "AllowPeeringRead", check: checkAllowPeeringRead}, + {name: "AllowPeeringWrite", check: checkAllowPeeringWrite}, {name: "AllowOperatorRead", check: checkAllowOperatorRead}, {name: "AllowOperatorWrite", check: checkAllowOperatorWrite}, {name: "AllowPreparedQueryRead", check: checkAllowPreparedQueryRead}, @@ -514,6 +543,8 @@ func TestACL(t *testing.T) { {name: "AllowNodeWrite", check: checkAllowNodeWrite}, {name: "AllowMeshRead", check: checkAllowMeshRead}, {name: "AllowMeshWrite", check: checkAllowMeshWrite}, + {name: "AllowPeeringRead", check: checkAllowPeeringRead}, + {name: "AllowPeeringWrite", check: checkAllowPeeringWrite}, {name: "AllowOperatorRead", check: checkAllowOperatorRead}, {name: "AllowOperatorWrite", check: checkAllowOperatorWrite}, {name: "AllowPreparedQueryRead", check: checkAllowPreparedQueryRead}, @@ -1217,6 +1248,319 @@ func TestACL(t *testing.T) { {name: "WriteAllowed", check: checkAllowMeshWrite}, }, }, + { + name: "PeeringDefaultAllowPolicyDeny", + defaultPolicy: AllowAll(), + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Peering: PolicyDeny, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadDenied", check: checkDenyPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + name: "PeeringDefaultAllowPolicyRead", + defaultPolicy: AllowAll(), + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Peering: PolicyRead, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + name: "PeeringDefaultAllowPolicyWrite", + defaultPolicy: AllowAll(), + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Peering: PolicyWrite, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteAllowed", check: checkAllowPeeringWrite}, + }, + }, + { + name: "PeeringDefaultAllowPolicyNone", + defaultPolicy: AllowAll(), + policyStack: []*Policy{ + {}, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteAllowed", check: checkAllowPeeringWrite}, + }, + }, + { + name: "PeeringDefaultDenyPolicyDeny", + defaultPolicy: DenyAll(), + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Peering: PolicyDeny, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadDenied", check: checkDenyPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + name: "PeeringDefaultDenyPolicyRead", + defaultPolicy: DenyAll(), + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Peering: PolicyRead, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + name: "PeeringDefaultDenyPolicyWrite", + defaultPolicy: DenyAll(), + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Peering: PolicyWrite, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteAllowed", check: checkAllowPeeringWrite}, + }, + }, + { + name: "PeeringDefaultDenyPolicyNone", + defaultPolicy: DenyAll(), + policyStack: []*Policy{ + {}, + }, + checks: []aclCheck{ + {name: "ReadDenied", check: checkDenyPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + // o:deny, p:deny = deny + name: "PeeringOperatorDenyPolicyDeny", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyDeny, + Peering: PolicyDeny, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadDenied", check: checkDenyPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + // o:read, p:deny = deny + name: "PeeringOperatorReadPolicyDeny", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyRead, + Peering: PolicyDeny, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadDenied", check: checkDenyPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + // o:write, p:deny = deny + name: "PeeringOperatorWritePolicyDeny", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyWrite, + Peering: PolicyDeny, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadDenied", check: checkDenyPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + // o:deny, p:read = read + name: "PeeringOperatorDenyPolicyRead", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyDeny, + Peering: PolicyRead, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + // o:read, p:read = read + name: "PeeringOperatorReadPolicyRead", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyRead, + Peering: PolicyRead, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + // o:write, p:read = read + name: "PeeringOperatorWritePolicyRead", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyWrite, + Peering: PolicyRead, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + // o:deny, p:write = write + name: "PeeringOperatorDenyPolicyWrite", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyDeny, + Peering: PolicyWrite, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteAllowed", check: checkAllowPeeringWrite}, + }, + }, + { + // o:read, p:write = write + name: "PeeringOperatorReadPolicyWrite", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyRead, + Peering: PolicyWrite, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteAllowed", check: checkAllowPeeringWrite}, + }, + }, + { + // o:write, p:write = write + name: "PeeringOperatorWritePolicyWrite", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyWrite, + Peering: PolicyWrite, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteAllowed", check: checkAllowPeeringWrite}, + }, + }, + { + // o:deny, p: = deny + name: "PeeringOperatorDenyPolicyNone", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyDeny, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadDenied", check: checkDenyPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + // o:read, p: = read + name: "PeeringOperatorReadPolicyNone", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyRead, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteDenied", check: checkDenyPeeringWrite}, + }, + }, + { + // o:write, p: = write + name: "PeeringOperatorWritePolicyNone", + defaultPolicy: nil, // test both + policyStack: []*Policy{ + { + PolicyRules: PolicyRules{ + Operator: PolicyWrite, + }, + }, + }, + checks: []aclCheck{ + {name: "ReadAllowed", check: checkAllowPeeringRead}, + {name: "WriteAllowed", check: checkAllowPeeringWrite}, + }, + }, { name: "OperatorDefaultAllowPolicyDeny", defaultPolicy: AllowAll(), diff --git a/acl/authorizer.go b/acl/authorizer.go index fe28c05ed..6842283b1 100644 --- a/acl/authorizer.go +++ b/acl/authorizer.go @@ -114,6 +114,14 @@ type Authorizer interface { // functions can be used. MeshWrite(*AuthorizerContext) EnforcementDecision + // PeeringRead determines if the read-only Consul peering functions + // can be used. + PeeringRead(*AuthorizerContext) EnforcementDecision + + // PeeringWrite determines if the stage-changing Consul peering + // functions can be used. + PeeringWrite(*AuthorizerContext) EnforcementDecision + // NodeRead checks for permission to read (discover) a given node. NodeRead(string, *AuthorizerContext) EnforcementDecision @@ -327,6 +335,24 @@ func (a AllowAuthorizer) MeshWriteAllowed(ctx *AuthorizerContext) error { return nil } +// PeeringReadAllowed determines if the read-only Consul peering functions +// can be used. +func (a AllowAuthorizer) PeeringReadAllowed(ctx *AuthorizerContext) error { + if a.Authorizer.PeeringRead(ctx) != Allow { + return PermissionDeniedByACLUnnamed(a, ctx, ResourcePeering, AccessRead) + } + return nil +} + +// PeeringWriteAllowed determines if the state-changing Consul peering +// functions can be used. +func (a AllowAuthorizer) PeeringWriteAllowed(ctx *AuthorizerContext) error { + if a.Authorizer.PeeringWrite(ctx) != Allow { + return PermissionDeniedByACLUnnamed(a, ctx, ResourcePeering, AccessWrite) + } + return nil +} + // NodeReadAllowed checks for permission to read (discover) a given node. func (a AllowAuthorizer) NodeReadAllowed(name string, ctx *AuthorizerContext) error { if a.Authorizer.NodeRead(name, ctx) != Allow { @@ -542,12 +568,11 @@ func Enforce(authz Authorizer, rsc Resource, segment string, access string, ctx return authz.SessionWrite(segment, ctx), nil } case ResourcePeering: - // TODO (peering) switch this over to using PeeringRead & PeeringWrite methods once implemented switch lowerAccess { case "read": - return authz.OperatorRead(ctx), nil + return authz.PeeringRead(ctx), nil case "write": - return authz.OperatorWrite(ctx), nil + return authz.PeeringWrite(ctx), nil } default: if processed, decision, err := enforceEnterprise(authz, rsc, segment, lowerAccess, ctx); processed { @@ -561,6 +586,7 @@ func Enforce(authz Authorizer, rsc Resource, segment string, access string, ctx // NewAuthorizerFromRules is a convenience function to invoke NewPolicyFromSource followed by NewPolicyAuthorizer with // the parse policy. +// TODO(ACL-Legacy-Compat): remove syntax arg after removing SyntaxLegacy func NewAuthorizerFromRules(rules string, syntax SyntaxVersion, conf *Config, meta *EnterprisePolicyMeta) (Authorizer, error) { policy, err := NewPolicyFromSource(rules, syntax, conf, meta) if err != nil { diff --git a/acl/authorizer_test.go b/acl/authorizer_test.go index f8aeda3d4..03c0517a1 100644 --- a/acl/authorizer_test.go +++ b/acl/authorizer_test.go @@ -139,6 +139,20 @@ func (m *mockAuthorizer) MeshWrite(ctx *AuthorizerContext) EnforcementDecision { return ret.Get(0).(EnforcementDecision) } +// PeeringRead determines if the read-only Consul peering functions +// can be used. +func (m *mockAuthorizer) PeeringRead(ctx *AuthorizerContext) EnforcementDecision { + ret := m.Called(ctx) + return ret.Get(0).(EnforcementDecision) +} + +// PeeringWrite determines if the state-changing Consul peering +// functions can be used. +func (m *mockAuthorizer) PeeringWrite(ctx *AuthorizerContext) EnforcementDecision { + ret := m.Called(ctx) + return ret.Get(0).(EnforcementDecision) +} + // OperatorRead determines if the read-only Consul operator functions // can be used. ret := m.Called(segment, ctx) func (m *mockAuthorizer) OperatorRead(ctx *AuthorizerContext) EnforcementDecision { @@ -463,29 +477,25 @@ func TestACL_Enforce(t *testing.T) { err: "Invalid access level", }, { - // TODO (peering) Update to use PeeringRead - method: "OperatorRead", + method: "PeeringRead", resource: ResourcePeering, access: "read", ret: Allow, }, { - // TODO (peering) Update to use PeeringRead - method: "OperatorRead", + method: "PeeringRead", resource: ResourcePeering, access: "read", ret: Deny, }, { - // TODO (peering) Update to use PeeringWrite - method: "OperatorWrite", + method: "PeeringWrite", resource: ResourcePeering, access: "write", ret: Allow, }, { - // TODO (peering) Update to use PeeringWrite - method: "OperatorWrite", + method: "PeeringWrite", resource: ResourcePeering, access: "write", ret: Deny, diff --git a/acl/chained_authorizer.go b/acl/chained_authorizer.go index 77df69a3e..cf81cc4b1 100644 --- a/acl/chained_authorizer.go +++ b/acl/chained_authorizer.go @@ -161,6 +161,22 @@ func (c *ChainedAuthorizer) MeshWrite(entCtx *AuthorizerContext) EnforcementDeci }) } +// PeeringRead determines if the read-only Consul peering functions +// can be used. +func (c *ChainedAuthorizer) PeeringRead(entCtx *AuthorizerContext) EnforcementDecision { + return c.executeChain(func(authz Authorizer) EnforcementDecision { + return authz.PeeringRead(entCtx) + }) +} + +// PeeringWrite determines if the state-changing Consul peering +// functions can be used. +func (c *ChainedAuthorizer) PeeringWrite(entCtx *AuthorizerContext) EnforcementDecision { + return c.executeChain(func(authz Authorizer) EnforcementDecision { + return authz.PeeringWrite(entCtx) + }) +} + // NodeRead checks for permission to read (discover) a given node. func (c *ChainedAuthorizer) NodeRead(node string, entCtx *AuthorizerContext) EnforcementDecision { return c.executeChain(func(authz Authorizer) EnforcementDecision { diff --git a/acl/chained_authorizer_test.go b/acl/chained_authorizer_test.go index 5f33d0166..284a1bd0e 100644 --- a/acl/chained_authorizer_test.go +++ b/acl/chained_authorizer_test.go @@ -68,6 +68,12 @@ func (authz testAuthorizer) MeshRead(*AuthorizerContext) EnforcementDecision { func (authz testAuthorizer) MeshWrite(*AuthorizerContext) EnforcementDecision { return EnforcementDecision(authz) } +func (authz testAuthorizer) PeeringRead(*AuthorizerContext) EnforcementDecision { + return EnforcementDecision(authz) +} +func (authz testAuthorizer) PeeringWrite(*AuthorizerContext) EnforcementDecision { + return EnforcementDecision(authz) +} func (authz testAuthorizer) OperatorRead(*AuthorizerContext) EnforcementDecision { return EnforcementDecision(authz) } @@ -128,6 +134,8 @@ func TestChainedAuthorizer(t *testing.T) { checkDenyNodeWrite(t, authz, "foo", nil) checkDenyMeshRead(t, authz, "foo", nil) checkDenyMeshWrite(t, authz, "foo", nil) + checkDenyPeeringRead(t, authz, "foo", nil) + checkDenyPeeringWrite(t, authz, "foo", nil) checkDenyOperatorRead(t, authz, "foo", nil) checkDenyOperatorWrite(t, authz, "foo", nil) checkDenyPreparedQueryRead(t, authz, "foo", nil) @@ -160,6 +168,8 @@ func TestChainedAuthorizer(t *testing.T) { checkDenyNodeWrite(t, authz, "foo", nil) checkDenyMeshRead(t, authz, "foo", nil) checkDenyMeshWrite(t, authz, "foo", nil) + checkDenyPeeringRead(t, authz, "foo", nil) + checkDenyPeeringWrite(t, authz, "foo", nil) checkDenyOperatorRead(t, authz, "foo", nil) checkDenyOperatorWrite(t, authz, "foo", nil) checkDenyPreparedQueryRead(t, authz, "foo", nil) @@ -192,6 +202,8 @@ func TestChainedAuthorizer(t *testing.T) { checkAllowNodeWrite(t, authz, "foo", nil) checkAllowMeshRead(t, authz, "foo", nil) checkAllowMeshWrite(t, authz, "foo", nil) + checkAllowPeeringRead(t, authz, "foo", nil) + checkAllowPeeringWrite(t, authz, "foo", nil) checkAllowOperatorRead(t, authz, "foo", nil) checkAllowOperatorWrite(t, authz, "foo", nil) checkAllowPreparedQueryRead(t, authz, "foo", nil) @@ -224,6 +236,8 @@ func TestChainedAuthorizer(t *testing.T) { checkDenyNodeWrite(t, authz, "foo", nil) checkDenyMeshRead(t, authz, "foo", nil) checkDenyMeshWrite(t, authz, "foo", nil) + checkDenyPeeringRead(t, authz, "foo", nil) + checkDenyPeeringWrite(t, authz, "foo", nil) checkDenyOperatorRead(t, authz, "foo", nil) checkDenyOperatorWrite(t, authz, "foo", nil) checkDenyPreparedQueryRead(t, authz, "foo", nil) @@ -254,6 +268,8 @@ func TestChainedAuthorizer(t *testing.T) { checkAllowNodeWrite(t, authz, "foo", nil) checkAllowMeshRead(t, authz, "foo", nil) checkAllowMeshWrite(t, authz, "foo", nil) + checkAllowPeeringRead(t, authz, "foo", nil) + checkAllowPeeringWrite(t, authz, "foo", nil) checkAllowOperatorRead(t, authz, "foo", nil) checkAllowOperatorWrite(t, authz, "foo", nil) checkAllowPreparedQueryRead(t, authz, "foo", nil) diff --git a/acl/policy.go b/acl/policy.go index d4ebd5976..59c3df8b3 100644 --- a/acl/policy.go +++ b/acl/policy.go @@ -85,6 +85,7 @@ type PolicyRules struct { Keyring string `hcl:"keyring"` Operator string `hcl:"operator"` Mesh string `hcl:"mesh"` + Peering string `hcl:"peering"` } // Policy is used to represent the policy specified by an ACL configuration. @@ -289,6 +290,10 @@ func (pr *PolicyRules) Validate(conf *Config) error { return fmt.Errorf("Invalid mesh policy: %#v", pr.Mesh) } + // Validate the peering policy - this one is allowed to be empty + if pr.Peering != "" && !isPolicyValid(pr.Peering, false) { + return fmt.Errorf("Invalid peering policy: %#v", pr.Peering) + } return nil } @@ -309,6 +314,7 @@ func parseCurrent(rules string, conf *Config, meta *EnterprisePolicyMeta) (*Poli return p, nil } +// TODO(ACL-Legacy-Compat): remove in phase 2 func parseLegacy(rules string, conf *Config) (*Policy, error) { p := &Policy{} @@ -436,6 +442,7 @@ func NewPolicyFromSource(rules string, syntax SyntaxVersion, conf *Config, meta var policy *Policy var err error switch syntax { + // TODO(ACL-Legacy-Compat): remove and remove as argument from function case SyntaxLegacy: policy, err = parseLegacy(rules, conf) case SyntaxCurrent: diff --git a/acl/policy_authorizer.go b/acl/policy_authorizer.go index 3b79a6316..4a24b6bd0 100644 --- a/acl/policy_authorizer.go +++ b/acl/policy_authorizer.go @@ -43,6 +43,9 @@ type policyAuthorizer struct { // meshRule contains the mesh policies. meshRule *policyAuthorizerRule + // peeringRule contains the peering policies. + peeringRule *policyAuthorizerRule + // embedded enterprise policy authorizer enterprisePolicyAuthorizer } @@ -322,6 +325,15 @@ func (p *policyAuthorizer) loadRules(policy *PolicyRules) error { p.meshRule = &policyAuthorizerRule{access: access} } + // Load the peering policy + if policy.Peering != "" { + access, err := AccessLevelFromString(policy.Peering) + if err != nil { + return err + } + p.peeringRule = &policyAuthorizerRule{access: access} + } + return nil } @@ -692,6 +704,25 @@ func (p *policyAuthorizer) MeshWrite(ctx *AuthorizerContext) EnforcementDecision return p.OperatorWrite(ctx) } +// PeeringRead determines if the read-only peering functions are allowed. +func (p *policyAuthorizer) PeeringRead(ctx *AuthorizerContext) EnforcementDecision { + if p.peeringRule != nil { + return enforce(p.peeringRule.access, AccessRead) + } + // default to OperatorRead access + return p.OperatorRead(ctx) +} + +// PeeringWrite determines if the state-changing peering functions are +// allowed. +func (p *policyAuthorizer) PeeringWrite(ctx *AuthorizerContext) EnforcementDecision { + if p.peeringRule != nil { + return enforce(p.peeringRule.access, AccessWrite) + } + // default to OperatorWrite access + return p.OperatorWrite(ctx) +} + // OperatorRead determines if the read-only operator functions are allowed. func (p *policyAuthorizer) OperatorRead(*AuthorizerContext) EnforcementDecision { if p.operatorRule != nil { diff --git a/acl/policy_authorizer_test.go b/acl/policy_authorizer_test.go index d2f69a4eb..57f41993a 100644 --- a/acl/policy_authorizer_test.go +++ b/acl/policy_authorizer_test.go @@ -50,6 +50,8 @@ func TestPolicyAuthorizer(t *testing.T) { {name: "DefaultNodeWrite", prefix: "foo", check: checkDefaultNodeWrite}, {name: "DefaultMeshRead", prefix: "foo", check: checkDefaultMeshRead}, {name: "DefaultMeshWrite", prefix: "foo", check: checkDefaultMeshWrite}, + {name: "DefaultPeeringRead", prefix: "foo", check: checkDefaultPeeringRead}, + {name: "DefaultPeeringWrite", prefix: "foo", check: checkDefaultPeeringWrite}, {name: "DefaultOperatorRead", prefix: "foo", check: checkDefaultOperatorRead}, {name: "DefaultOperatorWrite", prefix: "foo", check: checkDefaultOperatorWrite}, {name: "DefaultPreparedQueryRead", prefix: "foo", check: checkDefaultPreparedQueryRead}, diff --git a/acl/policy_merger.go b/acl/policy_merger.go index d4a454bc1..3a617aa1e 100644 --- a/acl/policy_merger.go +++ b/acl/policy_merger.go @@ -10,6 +10,7 @@ type policyRulesMergeContext struct { keyRules map[string]*KeyRule keyPrefixRules map[string]*KeyRule meshRule string + peeringRule string nodeRules map[string]*NodeRule nodePrefixRules map[string]*NodeRule operatorRule string @@ -33,6 +34,7 @@ func (p *policyRulesMergeContext) init() { p.keyRules = make(map[string]*KeyRule) p.keyPrefixRules = make(map[string]*KeyRule) p.meshRule = "" + p.peeringRule = "" p.nodeRules = make(map[string]*NodeRule) p.nodePrefixRules = make(map[string]*NodeRule) p.operatorRule = "" @@ -119,10 +121,6 @@ func (p *policyRulesMergeContext) merge(policy *PolicyRules) { } } - if takesPrecedenceOver(policy.Mesh, p.meshRule) { - p.meshRule = policy.Mesh - } - for _, np := range policy.Nodes { update := true if permission, found := p.nodeRules[np.Name]; found { @@ -145,6 +143,14 @@ func (p *policyRulesMergeContext) merge(policy *PolicyRules) { } } + if takesPrecedenceOver(policy.Mesh, p.meshRule) { + p.meshRule = policy.Mesh + } + + if takesPrecedenceOver(policy.Peering, p.peeringRule) { + p.peeringRule = policy.Peering + } + if takesPrecedenceOver(policy.Operator, p.operatorRule) { p.operatorRule = policy.Operator } @@ -235,6 +241,7 @@ func (p *policyRulesMergeContext) fill(merged *PolicyRules) { merged.Keyring = p.keyringRule merged.Operator = p.operatorRule merged.Mesh = p.meshRule + merged.Peering = p.peeringRule // All the for loop appends are ugly but Go doesn't have a way to get // a slice of all values within a map so this is necessary diff --git a/acl/policy_test.go b/acl/policy_test.go index 5416eb557..362451e98 100644 --- a/acl/policy_test.go +++ b/acl/policy_test.go @@ -65,6 +65,7 @@ func TestPolicySourceParse(t *testing.T) { } operator = "deny" mesh = "deny" + peering = "deny" service_prefix "" { policy = "write" } @@ -147,6 +148,7 @@ func TestPolicySourceParse(t *testing.T) { }, "operator": "deny", "mesh": "deny", + "peering": "deny", "service_prefix": { "": { "policy": "write" @@ -253,6 +255,7 @@ func TestPolicySourceParse(t *testing.T) { }, Operator: PolicyDeny, Mesh: PolicyDeny, + Peering: PolicyDeny, PreparedQueryPrefixes: []*PreparedQueryRule{ { Prefix: "", @@ -743,6 +746,13 @@ func TestPolicySourceParse(t *testing.T) { RulesJSON: `{ "mesh": "nope" }`, Err: "Invalid mesh policy", }, + { + Name: "Bad Policy - Peering", + Syntax: SyntaxCurrent, + Rules: `peering = "nope"`, + RulesJSON: `{ "peering": "nope" }`, + Err: "Invalid peering policy", + }, { Name: "Keyring Empty", Syntax: SyntaxCurrent, @@ -764,6 +774,13 @@ func TestPolicySourceParse(t *testing.T) { RulesJSON: `{ "mesh": "" }`, Expected: &Policy{PolicyRules: PolicyRules{Mesh: ""}}, }, + { + Name: "Peering Empty", + Syntax: SyntaxCurrent, + Rules: `peering = ""`, + RulesJSON: `{ "peering": "" }`, + Expected: &Policy{PolicyRules: PolicyRules{Peering: ""}}, + }, } for _, tc := range cases { @@ -1453,66 +1470,90 @@ func TestMergePolicies(t *testing.T) { { name: "Write Precedence", input: []*Policy{ - {PolicyRules: PolicyRules{ - ACL: PolicyRead, - Keyring: PolicyRead, - Operator: PolicyRead, - Mesh: PolicyRead, - }}, - {PolicyRules: PolicyRules{ + { + PolicyRules: PolicyRules{ + ACL: PolicyRead, + Keyring: PolicyRead, + Operator: PolicyRead, + Mesh: PolicyRead, + Peering: PolicyRead, + }, + }, + { + PolicyRules: PolicyRules{ + ACL: PolicyWrite, + Keyring: PolicyWrite, + Operator: PolicyWrite, + Mesh: PolicyWrite, + Peering: PolicyWrite, + }, + }, + }, + expected: &Policy{ + PolicyRules: PolicyRules{ ACL: PolicyWrite, Keyring: PolicyWrite, Operator: PolicyWrite, Mesh: PolicyWrite, - }}, + Peering: PolicyWrite, + }, }, - expected: &Policy{PolicyRules: PolicyRules{ - ACL: PolicyWrite, - Keyring: PolicyWrite, - Operator: PolicyWrite, - Mesh: PolicyWrite, - }}, }, { name: "Deny Precedence", input: []*Policy{ - {PolicyRules: PolicyRules{ - ACL: PolicyWrite, - Keyring: PolicyWrite, - Operator: PolicyWrite, - Mesh: PolicyWrite, - }}, - {PolicyRules: PolicyRules{ + { + PolicyRules: PolicyRules{ + ACL: PolicyWrite, + Keyring: PolicyWrite, + Operator: PolicyWrite, + Mesh: PolicyWrite, + Peering: PolicyWrite, + }, + }, + { + PolicyRules: PolicyRules{ + ACL: PolicyDeny, + Keyring: PolicyDeny, + Operator: PolicyDeny, + Mesh: PolicyDeny, + Peering: PolicyDeny, + }, + }, + }, + expected: &Policy{ + PolicyRules: PolicyRules{ ACL: PolicyDeny, Keyring: PolicyDeny, Operator: PolicyDeny, Mesh: PolicyDeny, - }}, + Peering: PolicyDeny, + }, }, - expected: &Policy{PolicyRules: PolicyRules{ - ACL: PolicyDeny, - Keyring: PolicyDeny, - Operator: PolicyDeny, - Mesh: PolicyDeny, - }}, }, { name: "Read Precedence", input: []*Policy{ - {PolicyRules: PolicyRules{ + { + PolicyRules: PolicyRules{ + ACL: PolicyRead, + Keyring: PolicyRead, + Operator: PolicyRead, + Mesh: PolicyRead, + Peering: PolicyRead, + }, + }, + {}, + }, + expected: &Policy{ + PolicyRules: PolicyRules{ ACL: PolicyRead, Keyring: PolicyRead, Operator: PolicyRead, Mesh: PolicyRead, - }}, - {}, + Peering: PolicyRead, + }, }, - expected: &Policy{PolicyRules: PolicyRules{ - ACL: PolicyRead, - Keyring: PolicyRead, - Operator: PolicyRead, - Mesh: PolicyRead, - }}, }, } @@ -1524,6 +1565,7 @@ func TestMergePolicies(t *testing.T) { require.Equal(t, exp.Keyring, act.Keyring) require.Equal(t, exp.Operator, act.Operator) require.Equal(t, exp.Mesh, act.Mesh) + require.Equal(t, exp.Peering, act.Peering) require.ElementsMatch(t, exp.Agents, act.Agents) require.ElementsMatch(t, exp.AgentPrefixes, act.AgentPrefixes) require.ElementsMatch(t, exp.Events, act.Events) @@ -1597,6 +1639,9 @@ operator = "write" # comment mesh = "write" + +# comment +peering = "write" ` expected := ` @@ -1652,6 +1697,9 @@ operator = "write" # comment mesh = "write" + +# comment +peering = "write" ` output, err := TranslateLegacyRules([]byte(input)) diff --git a/acl/static_authorizer.go b/acl/static_authorizer.go index 951b026f3..07cc84511 100644 --- a/acl/static_authorizer.go +++ b/acl/static_authorizer.go @@ -170,6 +170,20 @@ func (s *staticAuthorizer) MeshWrite(*AuthorizerContext) EnforcementDecision { return Deny } +func (s *staticAuthorizer) PeeringRead(*AuthorizerContext) EnforcementDecision { + if s.defaultAllow { + return Allow + } + return Deny +} + +func (s *staticAuthorizer) PeeringWrite(*AuthorizerContext) EnforcementDecision { + if s.defaultAllow { + return Allow + } + return Deny +} + func (s *staticAuthorizer) OperatorRead(*AuthorizerContext) EnforcementDecision { if s.defaultAllow { return Allow diff --git a/agent/acl_endpoint_test.go b/agent/acl_endpoint_test.go index 60a512ef4..5cffef6ee 100644 --- a/agent/acl_endpoint_test.go +++ b/agent/acl_endpoint_test.go @@ -2044,6 +2044,14 @@ func TestACL_Authorize(t *testing.T) { Resource: "mesh", Access: "write", }, + { + Resource: "peering", + Access: "read", + }, + { + Resource: "peering", + Access: "write", + }, { Resource: "query", Segment: "foo", @@ -2186,6 +2194,14 @@ func TestACL_Authorize(t *testing.T) { Resource: "mesh", Access: "write", }, + { + Resource: "peering", + Access: "read", + }, + { + Resource: "peering", + Access: "write", + }, { Resource: "query", Segment: "foo", @@ -2238,6 +2254,8 @@ func TestACL_Authorize(t *testing.T) { true, // operator:write true, // mesh:read true, // mesh:write + true, // peering:read + true, // peering:write false, // query:read false, // query:write true, // service:read diff --git a/agent/acl_test.go b/agent/acl_test.go index 2e8664c9f..79cc5f7b7 100644 --- a/agent/acl_test.go +++ b/agent/acl_test.go @@ -274,10 +274,10 @@ func TestACL_vetServiceRegister(t *testing.T) { // Try to register over a service without write privs to the existing // service. - a.State.AddService(&structs.NodeService{ + a.State.AddServiceWithChecks(&structs.NodeService{ ID: "my-service", Service: "other", - }, "") + }, nil, "") err = a.vetServiceRegister(serviceRWSecret, &structs.NodeService{ ID: "my-service", Service: "service", @@ -304,10 +304,10 @@ func TestACL_vetServiceUpdateWithAuthorizer(t *testing.T) { require.Contains(t, err.Error(), "Unknown service") // Update with write privs. - a.State.AddService(&structs.NodeService{ + a.State.AddServiceWithChecks(&structs.NodeService{ ID: "my-service", Service: "service", - }, "") + }, nil, "") err = vetServiceUpdate(serviceRWSecret, structs.NewServiceID("my-service", nil)) require.NoError(t, err) @@ -361,10 +361,10 @@ func TestACL_vetCheckRegisterWithAuthorizer(t *testing.T) { // Try to register over a service check without write privs to the // existing service. - a.State.AddService(&structs.NodeService{ + a.State.AddServiceWithChecks(&structs.NodeService{ ID: "my-service", Service: "service", - }, "") + }, nil, "") a.State.AddCheck(&structs.HealthCheck{ CheckID: types.CheckID("my-check"), ServiceID: "my-service", @@ -410,10 +410,10 @@ func TestACL_vetCheckUpdateWithAuthorizer(t *testing.T) { require.Contains(t, err.Error(), "Unknown check") // Update service check with write privs. - a.State.AddService(&structs.NodeService{ + a.State.AddServiceWithChecks(&structs.NodeService{ ID: "my-service", Service: "service", - }, "") + }, nil, "") a.State.AddCheck(&structs.HealthCheck{ CheckID: types.CheckID("my-service-check"), ServiceID: "my-service", diff --git a/agent/agent.go b/agent/agent.go index 765c1ab91..197434e77 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -761,12 +761,7 @@ func (a *Agent) Failed() <-chan struct{} { } func (a *Agent) buildExternalGRPCServer() { - // TLS is only enabled on the gRPC server if there's an HTTPS port configured. - var tls *tlsutil.Configurator - if a.config.HTTPSPort > 0 { - tls = a.tlsConfigurator - } - a.externalGRPCServer = external.NewServer(a.logger.Named("grpc.external"), tls) + a.externalGRPCServer = external.NewServer(a.logger.Named("grpc.external"), a.tlsConfigurator) } func (a *Agent) listenAndServeGRPC() error { @@ -1346,6 +1341,8 @@ func newConsulConfig(runtimeCfg *config.RuntimeConfig, logger hclog.Logger) (*co // function does not drift. cfg.SerfLANConfig = consul.CloneSerfLANConfig(cfg.SerfLANConfig) + cfg.PeeringEnabled = runtimeCfg.PeeringEnabled + enterpriseConsulConfig(cfg, runtimeCfg) return cfg, nil } @@ -4075,6 +4072,7 @@ func (a *Agent) registerCache() { a.cache.RegisterType(cachetype.IntentionMatchName, &cachetype.IntentionMatch{RPC: a}) a.cache.RegisterType(cachetype.IntentionUpstreamsName, &cachetype.IntentionUpstreams{RPC: a}) + a.cache.RegisterType(cachetype.IntentionUpstreamsDestinationName, &cachetype.IntentionUpstreamsDestination{RPC: a}) a.cache.RegisterType(cachetype.CatalogServicesName, &cachetype.CatalogServices{RPC: a}) @@ -4097,6 +4095,7 @@ func (a *Agent) registerCache() { a.cache.RegisterType(cachetype.CompiledDiscoveryChainName, &cachetype.CompiledDiscoveryChain{RPC: a}) a.cache.RegisterType(cachetype.GatewayServicesName, &cachetype.GatewayServices{RPC: a}) + a.cache.RegisterType(cachetype.ServiceGatewaysName, &cachetype.ServiceGateways{RPC: a}) a.cache.RegisterType(cachetype.ConfigEntryListName, &cachetype.ConfigEntryList{RPC: a}) @@ -4220,10 +4219,12 @@ func (a *Agent) proxyDataSources() proxycfg.DataSources { Datacenters: proxycfgglue.CacheDatacenters(a.cache), FederationStateListMeshGateways: proxycfgglue.CacheFederationStateListMeshGateways(a.cache), GatewayServices: proxycfgglue.CacheGatewayServices(a.cache), - Health: proxycfgglue.Health(a.rpcClientHealth), + ServiceGateways: proxycfgglue.CacheServiceGateways(a.cache), + Health: proxycfgglue.ClientHealth(a.rpcClientHealth), HTTPChecks: proxycfgglue.CacheHTTPChecks(a.cache), Intentions: proxycfgglue.CacheIntentions(a.cache), IntentionUpstreams: proxycfgglue.CacheIntentionUpstreams(a.cache), + IntentionUpstreamsDestination: proxycfgglue.CacheIntentionUpstreamsDestination(a.cache), InternalServiceDump: proxycfgglue.CacheInternalServiceDump(a.cache), LeafCertificate: proxycfgglue.CacheLeafCertificate(a.cache), PeeredUpstreams: proxycfgglue.CachePeeredUpstreams(a.cache), @@ -4237,6 +4238,7 @@ func (a *Agent) proxyDataSources() proxycfg.DataSources { if server, ok := a.delegate.(*consul.Server); ok { deps := proxycfgglue.ServerDataSourceDeps{ + Datacenter: a.config.Datacenter, EventPublisher: a.baseDeps.EventPublisher, ViewStore: a.baseDeps.ViewStore, Logger: a.logger.Named("proxycfg.server-data-sources"), @@ -4245,8 +4247,17 @@ func (a *Agent) proxyDataSources() proxycfg.DataSources { } sources.ConfigEntry = proxycfgglue.ServerConfigEntry(deps) sources.ConfigEntryList = proxycfgglue.ServerConfigEntryList(deps) + sources.CompiledDiscoveryChain = proxycfgglue.ServerCompiledDiscoveryChain(deps, proxycfgglue.CacheCompiledDiscoveryChain(a.cache)) + sources.ExportedPeeredServices = proxycfgglue.ServerExportedPeeredServices(deps) + sources.FederationStateListMeshGateways = proxycfgglue.ServerFederationStateListMeshGateways(deps) + sources.GatewayServices = proxycfgglue.ServerGatewayServices(deps) + sources.Health = proxycfgglue.ServerHealth(deps, proxycfgglue.ClientHealth(a.rpcClientHealth)) sources.Intentions = proxycfgglue.ServerIntentions(deps) sources.IntentionUpstreams = proxycfgglue.ServerIntentionUpstreams(deps) + sources.PeeredUpstreams = proxycfgglue.ServerPeeredUpstreams(deps) + sources.ServiceList = proxycfgglue.ServerServiceList(deps, proxycfgglue.CacheServiceList(a.cache)) + sources.TrustBundle = proxycfgglue.ServerTrustBundle(deps) + sources.TrustBundleList = proxycfgglue.ServerTrustBundleList(deps) } a.fillEnterpriseProxyDataSources(&sources) diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 7bde62387..270cc7dc1 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -93,7 +93,7 @@ func TestAgent_Services(t *testing.T) { }, Port: 5000, } - require.NoError(t, a.State.AddService(srv1, "")) + require.NoError(t, a.State.AddServiceWithChecks(srv1, nil, "")) req, _ := http.NewRequest("GET", "/v1/agent/services", nil) resp := httptest.NewRecorder() @@ -128,7 +128,7 @@ func TestAgent_ServicesFiltered(t *testing.T) { }, Port: 5000, } - require.NoError(t, a.State.AddService(srv1, "")) + require.NoError(t, a.State.AddServiceWithChecks(srv1, nil, "")) // Add another service srv2 := &structs.NodeService{ @@ -140,7 +140,7 @@ func TestAgent_ServicesFiltered(t *testing.T) { }, Port: 1234, } - require.NoError(t, a.State.AddService(srv2, "")) + require.NoError(t, a.State.AddServiceWithChecks(srv2, nil, "")) req, _ := http.NewRequest("GET", "/v1/agent/services?filter="+url.QueryEscape("foo in Meta"), nil) resp := httptest.NewRecorder() @@ -188,7 +188,7 @@ func TestAgent_Services_ExternalConnectProxy(t *testing.T) { Upstreams: structs.TestUpstreams(t), }, } - a.State.AddService(srv1, "") + a.State.AddServiceWithChecks(srv1, nil, "") req, _ := http.NewRequest("GET", "/v1/agent/services", nil) resp := httptest.NewRecorder() @@ -232,7 +232,7 @@ func TestAgent_Services_Sidecar(t *testing.T) { }, }, } - a.State.AddService(srv1, "") + a.State.AddServiceWithChecks(srv1, nil, "") req, _ := http.NewRequest("GET", "/v1/agent/services", nil) resp := httptest.NewRecorder() @@ -281,7 +281,7 @@ func TestAgent_Services_MeshGateway(t *testing.T) { }, }, } - a.State.AddService(srv1, "") + a.State.AddServiceWithChecks(srv1, nil, "") req, _ := http.NewRequest("GET", "/v1/agent/services", nil) resp := httptest.NewRecorder() @@ -325,7 +325,7 @@ func TestAgent_Services_TerminatingGateway(t *testing.T) { }, }, } - require.NoError(t, a.State.AddService(srv1, "")) + require.NoError(t, a.State.AddServiceWithChecks(srv1, nil, "")) req, _ := http.NewRequest("GET", "/v1/agent/services", nil) resp := httptest.NewRecorder() @@ -370,7 +370,7 @@ func TestAgent_Services_ACLFilter(t *testing.T) { }, } for _, s := range services { - a.State.AddService(s, "") + a.State.AddServiceWithChecks(s, nil, "") } t.Run("no token", func(t *testing.T) { @@ -7994,7 +7994,7 @@ func TestAgent_Services_ExposeConfig(t *testing.T) { }, }, } - a.State.AddService(srv1, "") + a.State.AddServiceWithChecks(srv1, nil, "") req, _ := http.NewRequest("GET", "/v1/agent/services", nil) resp := httptest.NewRecorder() diff --git a/agent/cache-types/service_gateways.go b/agent/cache-types/service_gateways.go new file mode 100644 index 000000000..1c7a8e855 --- /dev/null +++ b/agent/cache-types/service_gateways.go @@ -0,0 +1,52 @@ +package cachetype + +import ( + "fmt" + + "github.com/hashicorp/consul/agent/cache" + "github.com/hashicorp/consul/agent/structs" +) + +// Recommended name for registration. +const ServiceGatewaysName = "service-gateways" + +// GatewayUpstreams supports fetching upstreams for a given gateway name. +type ServiceGateways struct { + RegisterOptionsBlockingRefresh + RPC RPC +} + +func (g *ServiceGateways) Fetch(opts cache.FetchOptions, req cache.Request) (cache.FetchResult, error) { + var result cache.FetchResult + + // The request should be a ServiceSpecificRequest. + reqReal, ok := req.(*structs.ServiceSpecificRequest) + if !ok { + return result, fmt.Errorf( + "Internal cache failure: request wrong type: %T", req) + } + + // Lightweight copy this object so that manipulating QueryOptions doesn't race. + dup := *reqReal + reqReal = &dup + + // Set the minimum query index to our current index so we block + reqReal.QueryOptions.MinQueryIndex = opts.MinIndex + reqReal.QueryOptions.MaxQueryTime = opts.Timeout + + // Always allow stale - there's no point in hitting leader if the request is + // going to be served from cache and end up arbitrarily stale anyway. This + // allows cached service-discover to automatically read scale across all + // servers too. + reqReal.AllowStale = true + + // Fetch + var reply structs.IndexedCheckServiceNodes + if err := g.RPC.RPC("Internal.ServiceGateways", reqReal, &reply); err != nil { + return result, err + } + + result.Value = &reply + result.Index = reply.QueryMeta.Index + return result, nil +} diff --git a/agent/cache-types/service_gateways_test.go b/agent/cache-types/service_gateways_test.go new file mode 100644 index 000000000..39c6b474d --- /dev/null +++ b/agent/cache-types/service_gateways_test.go @@ -0,0 +1,57 @@ +package cachetype + +import ( + "testing" + "time" + + "github.com/hashicorp/consul/agent/cache" + "github.com/hashicorp/consul/agent/structs" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestServiceGateways(t *testing.T) { + rpc := TestRPC(t) + typ := &ServiceGateways{RPC: rpc} + + // Expect the proper RPC call. This also sets the expected value + // since that is return-by-pointer in the arguments. + var resp *structs.IndexedCheckServiceNodes + rpc.On("RPC", "Internal.ServiceGateways", mock.Anything, mock.Anything).Return(nil). + Run(func(args mock.Arguments) { + req := args.Get(1).(*structs.ServiceSpecificRequest) + require.Equal(t, uint64(24), req.QueryOptions.MinQueryIndex) + require.Equal(t, 1*time.Second, req.QueryOptions.MaxQueryTime) + require.True(t, req.AllowStale) + require.Equal(t, "foo", req.ServiceName) + + nodes := []structs.CheckServiceNode{ + { + Service: &structs.NodeService{ + Tags: req.ServiceTags, + }, + }, + } + + reply := args.Get(2).(*structs.IndexedCheckServiceNodes) + reply.Nodes = nodes + reply.QueryMeta.Index = 48 + resp = reply + }) + + // Fetch + resultA, err := typ.Fetch(cache.FetchOptions{ + MinIndex: 24, + Timeout: 1 * time.Second, + }, &structs.ServiceSpecificRequest{ + Datacenter: "dc1", + ServiceName: "foo", + }) + require.NoError(t, err) + require.Equal(t, cache.FetchResult{ + Value: resp, + Index: 48, + }, resultA) + + rpc.AssertExpectations(t) +} diff --git a/agent/cache-types/trust_bundle.go b/agent/cache-types/trust_bundle.go index 16b8f204b..48dad6437 100644 --- a/agent/cache-types/trust_bundle.go +++ b/agent/cache-types/trust_bundle.go @@ -3,16 +3,53 @@ package cachetype import ( "context" "fmt" + "strconv" + "time" + "github.com/mitchellh/hashstructure" "google.golang.org/grpc" "github.com/hashicorp/consul/agent/cache" + external "github.com/hashicorp/consul/agent/grpc-external" + "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/proto/pbpeering" ) // Recommended name for registration. const TrustBundleReadName = "peer-trust-bundle" +type TrustBundleReadRequest struct { + Request *pbpeering.TrustBundleReadRequest + structs.QueryOptions +} + +func (r *TrustBundleReadRequest) CacheInfo() cache.RequestInfo { + info := cache.RequestInfo{ + Token: r.Token, + Datacenter: "", + MinIndex: 0, + Timeout: 0, + MustRevalidate: false, + + // OPTIMIZE(peering): Cache.notifyPollingQuery polls at this interval. We need to revisit how that polling works. + // Using an exponential backoff when the result hasn't changed may be preferable. + MaxAge: 1 * time.Second, + } + + v, err := hashstructure.Hash([]interface{}{ + r.Request.Partition, + r.Request.Name, + }, nil) + if err == nil { + // If there is an error, we don't set the key. A blank key forces + // no cache for this request so the request is forwarded directly + // to the server. + info.Key = strconv.FormatUint(v, 10) + } + + return info +} + // TrustBundle supports fetching discovering service instances via prepared // queries. type TrustBundle struct { @@ -33,14 +70,20 @@ func (t *TrustBundle) Fetch(_ cache.FetchOptions, req cache.Request) (cache.Fetc // The request should be a TrustBundleReadRequest. // We do not need to make a copy of this request type like in other cache types // because the RequestInfo is synthetic. - reqReal, ok := req.(*pbpeering.TrustBundleReadRequest) + reqReal, ok := req.(*TrustBundleReadRequest) if !ok { return result, fmt.Errorf( "Internal cache failure: request wrong type: %T", req) } + // Always allow stale - there's no point in hitting leader if the request is + // going to be served from cache and end up arbitrarily stale anyway. This + // allows cached service-discover to automatically read scale across all + // servers too. + reqReal.QueryOptions.SetAllowStale(true) + // Fetch - reply, err := t.Client.TrustBundleRead(context.Background(), reqReal) + reply, err := t.Client.TrustBundleRead(external.ContextWithToken(context.Background(), reqReal.Token), reqReal.Request) if err != nil { return result, err } diff --git a/agent/cache-types/trust_bundle_test.go b/agent/cache-types/trust_bundle_test.go index fa3d016a2..ee03838aa 100644 --- a/agent/cache-types/trust_bundle_test.go +++ b/agent/cache-types/trust_bundle_test.go @@ -33,8 +33,10 @@ func TestTrustBundle(t *testing.T) { Return(resp, nil) // Fetch and assert against the result. - result, err := typ.Fetch(cache.FetchOptions{}, &pbpeering.TrustBundleReadRequest{ - Name: "foo", + result, err := typ.Fetch(cache.FetchOptions{}, &TrustBundleReadRequest{ + Request: &pbpeering.TrustBundleReadRequest{ + Name: "foo", + }, }) require.NoError(t, err) require.Equal(t, cache.FetchResult{ @@ -82,7 +84,9 @@ func TestTrustBundle_MultipleUpdates(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) t.Cleanup(cancel) - err := c.Notify(ctx, TrustBundleReadName, &pbpeering.TrustBundleReadRequest{Name: "foo"}, "updates", ch) + err := c.Notify(ctx, TrustBundleReadName, &TrustBundleReadRequest{ + Request: &pbpeering.TrustBundleReadRequest{Name: "foo"}, + }, "updates", ch) require.NoError(t, err) i := uint64(1) diff --git a/agent/cache-types/trust_bundles.go b/agent/cache-types/trust_bundles.go index 5b4bbcc13..70c63cb4b 100644 --- a/agent/cache-types/trust_bundles.go +++ b/agent/cache-types/trust_bundles.go @@ -3,16 +3,55 @@ package cachetype import ( "context" "fmt" + "strconv" + "time" + "github.com/mitchellh/hashstructure" "google.golang.org/grpc" "github.com/hashicorp/consul/agent/cache" + external "github.com/hashicorp/consul/agent/grpc-external" + "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/proto/pbpeering" ) // Recommended name for registration. const TrustBundleListName = "trust-bundles" +type TrustBundleListRequest struct { + Request *pbpeering.TrustBundleListByServiceRequest + structs.QueryOptions +} + +func (r *TrustBundleListRequest) CacheInfo() cache.RequestInfo { + info := cache.RequestInfo{ + Token: r.Token, + Datacenter: "", + MinIndex: 0, + Timeout: 0, + MustRevalidate: false, + + // OPTIMIZE(peering): Cache.notifyPollingQuery polls at this interval. We need to revisit how that polling works. + // Using an exponential backoff when the result hasn't changed may be preferable. + MaxAge: 1 * time.Second, + } + + v, err := hashstructure.Hash([]interface{}{ + r.Request.Partition, + r.Request.Namespace, + r.Request.ServiceName, + r.Request.Kind, + }, nil) + if err == nil { + // If there is an error, we don't set the key. A blank key forces + // no cache for this request so the request is forwarded directly + // to the server. + info.Key = strconv.FormatUint(v, 10) + } + + return info +} + // TrustBundles supports fetching discovering service instances via prepared // queries. type TrustBundles struct { @@ -30,17 +69,23 @@ type TrustBundleLister interface { func (t *TrustBundles) Fetch(_ cache.FetchOptions, req cache.Request) (cache.FetchResult, error) { var result cache.FetchResult - // The request should be a TrustBundleListByServiceRequest. + // The request should be a TrustBundleListRequest. // We do not need to make a copy of this request type like in other cache types // because the RequestInfo is synthetic. - reqReal, ok := req.(*pbpeering.TrustBundleListByServiceRequest) + reqReal, ok := req.(*TrustBundleListRequest) if !ok { return result, fmt.Errorf( "Internal cache failure: request wrong type: %T", req) } + // Always allow stale - there's no point in hitting leader if the request is + // going to be served from cache and end up arbitrarily stale anyway. This + // allows cached service-discover to automatically read scale across all + // servers too. + reqReal.QueryOptions.SetAllowStale(true) + // Fetch - reply, err := t.Client.TrustBundleListByService(context.Background(), reqReal) + reply, err := t.Client.TrustBundleListByService(external.ContextWithToken(context.Background(), reqReal.Token), reqReal.Request) if err != nil { return result, err } diff --git a/agent/cache-types/trust_bundles_test.go b/agent/cache-types/trust_bundles_test.go index d5fbd6f50..09d8a80bc 100644 --- a/agent/cache-types/trust_bundles_test.go +++ b/agent/cache-types/trust_bundles_test.go @@ -36,8 +36,10 @@ func TestTrustBundles(t *testing.T) { Return(resp, nil) // Fetch and assert against the result. - result, err := typ.Fetch(cache.FetchOptions{}, &pbpeering.TrustBundleListByServiceRequest{ - ServiceName: "foo", + result, err := typ.Fetch(cache.FetchOptions{}, &TrustBundleListRequest{ + Request: &pbpeering.TrustBundleListByServiceRequest{ + ServiceName: "foo", + }, }) require.NoError(t, err) require.Equal(t, cache.FetchResult{ @@ -85,7 +87,9 @@ func TestTrustBundles_MultipleUpdates(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) t.Cleanup(cancel) - err := c.Notify(ctx, TrustBundleListName, &pbpeering.TrustBundleListByServiceRequest{ServiceName: "foo"}, "updates", ch) + err := c.Notify(ctx, TrustBundleListName, &TrustBundleListRequest{ + Request: &pbpeering.TrustBundleListByServiceRequest{ServiceName: "foo"}, + }, "updates", ch) require.NoError(t, err) i := uint64(1) diff --git a/agent/config/builder.go b/agent/config/builder.go index f855aae51..70c5d044c 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -1014,6 +1014,7 @@ func (b *builder) build() (rt RuntimeConfig, err error) { NodeMeta: c.NodeMeta, NodeName: b.nodeName(c.NodeName), ReadReplica: boolVal(c.ReadReplica), + PeeringEnabled: boolVal(c.Peering.Enabled), PidFile: stringVal(c.PidFile), PrimaryDatacenter: primaryDatacenter, PrimaryGateways: b.expandAllOptionalAddrs("primary_gateways", c.PrimaryGateways), diff --git a/agent/config/config.go b/agent/config/config.go index c4f752a82..23e7550aa 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -197,6 +197,7 @@ type Config struct { NodeID *string `mapstructure:"node_id"` NodeMeta map[string]string `mapstructure:"node_meta"` NodeName *string `mapstructure:"node_name"` + Peering Peering `mapstructure:"peering"` Performance Performance `mapstructure:"performance"` PidFile *string `mapstructure:"pid_file"` Ports Ports `mapstructure:"ports"` @@ -887,3 +888,7 @@ type TLS struct { // config merging logic. GRPCModifiedByDeprecatedConfig *struct{} `mapstructure:"-"` } + +type Peering struct { + Enabled *bool `mapstructure:"enabled"` +} diff --git a/agent/config/default.go b/agent/config/default.go index 951d9f126..d0cc2865d 100644 --- a/agent/config/default.go +++ b/agent/config/default.go @@ -104,6 +104,9 @@ func DefaultSource() Source { kv_max_value_size = ` + strconv.FormatInt(raft.SuggestedMaxDataSize, 10) + ` txn_max_req_len = ` + strconv.FormatInt(raft.SuggestedMaxDataSize, 10) + ` } + peering = { + enabled = true + } performance = { leave_drain_time = "5s" raft_multiplier = ` + strconv.Itoa(int(consul.DefaultRaftMultiplier)) + ` diff --git a/agent/config/runtime.go b/agent/config/runtime.go index 2ae9888ae..db46c2184 100644 --- a/agent/config/runtime.go +++ b/agent/config/runtime.go @@ -810,6 +810,14 @@ type RuntimeConfig struct { // flag: -non-voting-server ReadReplica bool + // PeeringEnabled enables cluster peering. This setting only applies for servers. + // When disabled, all peering RPC endpoints will return errors, + // peering requests from other clusters will receive errors, and any peerings already stored in this server's + // state will be ignored. + // + // hcl: peering { enabled = (true|false) } + PeeringEnabled bool + // PidFile is the file to store our PID in. // // hcl: pid_file = string diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index 0963ec07f..b05b31491 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -5548,6 +5548,16 @@ func TestLoad_IntegrationWithFlags(t *testing.T) { "tls.grpc was provided but TLS will NOT be enabled on the gRPC listener without an HTTPS listener configured (e.g. via ports.https)", }, }) + run(t, testCase{ + desc: "peering.enabled defaults to true", + args: []string{ + `-data-dir=` + dataDir, + }, + expected: func(rt *RuntimeConfig) { + rt.DataDir = dataDir + rt.PeeringEnabled = true + }, + }) } func (tc testCase) run(format string, dataDir string) func(t *testing.T) { @@ -5955,6 +5965,7 @@ func TestLoad_FullConfig(t *testing.T) { NodeMeta: map[string]string{"5mgGQMBk": "mJLtVMSG", "A7ynFMJB": "0Nx6RGab"}, NodeName: "otlLxGaI", ReadReplica: true, + PeeringEnabled: true, PidFile: "43xN80Km", PrimaryGateways: []string{"aej8eeZo", "roh2KahS"}, PrimaryGatewaysInterval: 18866 * time.Second, diff --git a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden index 25fbba0c0..b5d72f864 100644 --- a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden +++ b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden @@ -235,6 +235,7 @@ "NodeID": "", "NodeMeta": {}, "NodeName": "", + "PeeringEnabled": false, "PidFile": "", "PrimaryDatacenter": "", "PrimaryGateways": [ diff --git a/agent/config/testdata/full-config.hcl b/agent/config/testdata/full-config.hcl index bb544b54a..ed8203296 100644 --- a/agent/config/testdata/full-config.hcl +++ b/agent/config/testdata/full-config.hcl @@ -305,6 +305,9 @@ node_meta { node_name = "otlLxGaI" non_voting_server = true partition = "" +peering { + enabled = true +} performance { leave_drain_time = "8265s" raft_multiplier = 5 diff --git a/agent/config/testdata/full-config.json b/agent/config/testdata/full-config.json index 36f52e681..8294a27b7 100644 --- a/agent/config/testdata/full-config.json +++ b/agent/config/testdata/full-config.json @@ -305,6 +305,9 @@ "node_name": "otlLxGaI", "non_voting_server": true, "partition": "", + "peering": { + "enabled": true + }, "performance": { "leave_drain_time": "8265s", "raft_multiplier": 5, diff --git a/agent/consul/authmethod/awsauth/aws.go b/agent/consul/authmethod/awsauth/aws.go index f3995cdc5..7c7758476 100644 --- a/agent/consul/authmethod/awsauth/aws.go +++ b/agent/consul/authmethod/awsauth/aws.go @@ -4,9 +4,9 @@ import ( "context" "fmt" + iamauth "github.com/hashicorp/consul-awsauth" "github.com/hashicorp/consul/agent/consul/authmethod" "github.com/hashicorp/consul/agent/structs" - "github.com/hashicorp/consul/internal/iamauth" "github.com/hashicorp/go-hclog" ) diff --git a/agent/consul/authmethod/awsauth/aws_test.go b/agent/consul/authmethod/awsauth/aws_test.go index 3025275cf..031cd035b 100644 --- a/agent/consul/authmethod/awsauth/aws_test.go +++ b/agent/consul/authmethod/awsauth/aws_test.go @@ -8,10 +8,10 @@ import ( "testing" "github.com/aws/aws-sdk-go/aws/credentials" + iamauth "github.com/hashicorp/consul-awsauth" + "github.com/hashicorp/consul-awsauth/iamauthtest" "github.com/hashicorp/consul/agent/consul/authmethod" "github.com/hashicorp/consul/agent/structs" - "github.com/hashicorp/consul/internal/iamauth" - "github.com/hashicorp/consul/internal/iamauth/iamauthtest" "github.com/hashicorp/go-hclog" "github.com/stretchr/testify/require" ) diff --git a/agent/consul/catalog_endpoint.go b/agent/consul/catalog_endpoint.go index 6508ba220..5ab8fb12d 100644 --- a/agent/consul/catalog_endpoint.go +++ b/agent/consul/catalog_endpoint.go @@ -176,7 +176,7 @@ func servicePreApply(service *structs.NodeService, authz resolver.Result, authzC // Verify ServiceName provided if ID. if service.ID != "" && service.Service == "" { - return fmt.Errorf("Must provide service name with ID") + return fmt.Errorf("Must provide service name (Service.Service) when service ID is provided") } // Check the service address here and in the agent endpoint diff --git a/agent/consul/config.go b/agent/consul/config.go index 50235c681..469ccc919 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -396,6 +396,9 @@ type Config struct { RaftBoltDBConfig RaftBoltDBConfig + // PeeringEnabled enables cluster peering. + PeeringEnabled bool + // Embedded Consul Enterprise specific configuration *EnterpriseConfig } @@ -512,6 +515,8 @@ func DefaultConfig() *Config { DefaultQueryTime: 300 * time.Second, MaxQueryTime: 600 * time.Second, + PeeringEnabled: true, + EnterpriseConfig: DefaultEnterpriseConfig(), } diff --git a/agent/consul/config_endpoint_test.go b/agent/consul/config_endpoint_test.go index dc0c8d82f..bbed2bf1b 100644 --- a/agent/consul/config_endpoint_test.go +++ b/agent/consul/config_endpoint_test.go @@ -1141,8 +1141,8 @@ func TestConfigEntry_ResolveServiceConfig_TransparentProxy(t *testing.T) { Name: "foo", Mode: structs.ProxyModeTransparent, Destination: &structs.DestinationConfig{ - Address: "hello.world.com", - Port: 443, + Addresses: []string{"hello.world.com"}, + Port: 443, }, }, }, @@ -1153,8 +1153,8 @@ func TestConfigEntry_ResolveServiceConfig_TransparentProxy(t *testing.T) { expect: structs.ServiceConfigResponse{ Mode: structs.ProxyModeTransparent, Destination: structs.DestinationConfig{ - Address: "hello.world.com", - Port: 443, + Addresses: []string{"hello.world.com"}, + Port: 443, }, }, }, diff --git a/agent/consul/fsm/fsm.go b/agent/consul/fsm/fsm.go index 8fa617b45..432e64631 100644 --- a/agent/consul/fsm/fsm.go +++ b/agent/consul/fsm/fsm.go @@ -324,4 +324,11 @@ func (c *FSM) registerStreamSnapshotHandlers() { if err != nil { panic(fmt.Errorf("fatal error encountered registering streaming snapshot handlers: %w", err)) } + + err = c.deps.Publisher.RegisterHandler(state.EventTopicServiceList, func(req stream.SubscribeRequest, buf stream.SnapshotAppender) (uint64, error) { + return c.State().ServiceListSnapshot(req, buf) + }, true) + if err != nil { + panic(fmt.Errorf("fatal error encountered registering streaming snapshot handlers: %w", err)) + } } diff --git a/agent/consul/helper_test.go b/agent/consul/helper_test.go index 957653ad2..0f89856d7 100644 --- a/agent/consul/helper_test.go +++ b/agent/consul/helper_test.go @@ -1213,10 +1213,12 @@ func registerTestRoutingConfigTopologyEntries(t *testing.T, codec rpc.ClientCode func registerLocalAndRemoteServicesVIPEnabled(t *testing.T, state *state.Store) { t.Helper() - _, entry, err := state.SystemMetadataGet(nil, structs.SystemMetadataVirtualIPsEnabled) - require.NoError(t, err) - require.NotNil(t, entry) - require.Equal(t, "true", entry.Value) + retry.Run(t, func(r *retry.R) { + _, entry, err := state.SystemMetadataGet(nil, structs.SystemMetadataVirtualIPsEnabled) + require.NoError(r, err) + require.NotNil(r, entry) + require.Equal(r, "true", entry.Value) + }) // Register a local connect-native service require.NoError(t, state.EnsureRegistration(10, &structs.RegisterRequest{ @@ -1462,8 +1464,8 @@ func registerIntentionUpstreamEntries(t *testing.T, codec rpc.ClientCodec, token Kind: structs.ServiceDefaults, Name: "api.example.com", Destination: &structs.DestinationConfig{ - Address: "api.example.com", - Port: 443, + Addresses: []string{"api.example.com"}, + Port: 443, }, }, WriteRequest: structs.WriteRequest{Token: token}, @@ -1474,8 +1476,8 @@ func registerIntentionUpstreamEntries(t *testing.T, codec rpc.ClientCodec, token Kind: structs.ServiceDefaults, Name: "kafka.store.com", Destination: &structs.DestinationConfig{ - Address: "172.168.2.1", - Port: 9003, + Addresses: []string{"172.168.2.1"}, + Port: 9003, }, }, WriteRequest: structs.WriteRequest{Token: token}, diff --git a/agent/consul/internal_endpoint.go b/agent/consul/internal_endpoint.go index a041c7eeb..8f44c0f7a 100644 --- a/agent/consul/internal_endpoint.go +++ b/agent/consul/internal_endpoint.go @@ -453,6 +453,56 @@ func (m *Internal) GatewayServiceDump(args *structs.ServiceSpecificRequest, repl return err } +// ServiceGateways returns all the nodes for services associated with a gateway along with their gateway config +func (m *Internal) ServiceGateways(args *structs.ServiceSpecificRequest, reply *structs.IndexedCheckServiceNodes) error { + if done, err := m.srv.ForwardRPC("Internal.ServiceGateways", args, reply); done { + return err + } + + // Verify the arguments + if args.ServiceName == "" { + return fmt.Errorf("Must provide gateway name") + } + + var authzContext acl.AuthorizerContext + authz, err := m.srv.ResolveTokenAndDefaultMeta(args.Token, &args.EnterpriseMeta, &authzContext) + if err != nil { + return err + } + + if err := m.srv.validateEnterpriseRequest(&args.EnterpriseMeta, false); err != nil { + return err + } + + // We need read access to the service we're trying to find gateways for, so check that first. + if err := authz.ToAllowAuthorizer().ServiceReadAllowed(args.ServiceName, &authzContext); err != nil { + return err + } + + err = m.srv.blockingQuery( + &args.QueryOptions, + &reply.QueryMeta, + func(ws memdb.WatchSet, state *state.Store) error { + var maxIdx uint64 + idx, gateways, err := state.ServiceGateways(ws, args.ServiceName, args.ServiceKind, args.EnterpriseMeta) + if err != nil { + return err + } + if idx > maxIdx { + maxIdx = idx + } + + reply.Index, reply.Nodes = maxIdx, gateways + + if err := m.srv.filterACL(args.Token, reply); err != nil { + return err + } + return nil + }) + + return err +} + // GatewayIntentions Match returns the set of intentions that match the given source/destination. func (m *Internal) GatewayIntentions(args *structs.IntentionQueryRequest, reply *structs.IndexedIntentions) error { // Forward if necessary diff --git a/agent/consul/internal_endpoint_test.go b/agent/consul/internal_endpoint_test.go index 7d7d421c8..91d48601c 100644 --- a/agent/consul/internal_endpoint_test.go +++ b/agent/consul/internal_endpoint_test.go @@ -2782,6 +2782,10 @@ func TestInternal_PeeredUpstreams(t *testing.T) { t.Skip("too slow for testing.Short") } + orig := virtualIPVersionCheckInterval + virtualIPVersionCheckInterval = 50 * time.Millisecond + t.Cleanup(func() { virtualIPVersionCheckInterval = orig }) + t.Parallel() _, s1 := testServerWithConfig(t) @@ -2811,3 +2815,479 @@ func TestInternal_PeeredUpstreams(t *testing.T) { } require.Equal(t, expect, out.Services) } + +func TestInternal_ServiceGatewayService_Terminating(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + t.Parallel() + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + codec := rpcClient(t, s1) + defer codec.Close() + + testrpc.WaitForTestAgent(t, s1.RPC, "dc1") + + db := structs.NodeService{ + ID: "db2", + Service: "db", + } + + redis := structs.NodeService{ + ID: "redis", + Service: "redis", + } + + // Register gateway and two service instances that will be associated with it + { + arg := structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "10.1.2.2", + Service: &structs.NodeService{ + ID: "terminating-gateway-01", + Service: "terminating-gateway", + Kind: structs.ServiceKindTerminatingGateway, + Port: 443, + Address: "198.18.1.3", + }, + Check: &structs.HealthCheck{ + Name: "terminating connect", + Status: api.HealthPassing, + ServiceID: "terminating-gateway-01", + }, + } + var out struct{} + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out)) + + arg = structs.RegisterRequest{ + Datacenter: "dc1", + Node: "bar", + Address: "127.0.0.2", + Service: &structs.NodeService{ + ID: "db", + Service: "db", + }, + Check: &structs.HealthCheck{ + Name: "db-warning", + Status: api.HealthWarning, + ServiceID: "db", + }, + } + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out)) + + arg = structs.RegisterRequest{ + Datacenter: "dc1", + Node: "baz", + Address: "127.0.0.3", + Service: &db, + Check: &structs.HealthCheck{ + Name: "db2-passing", + Status: api.HealthPassing, + ServiceID: "db2", + }, + } + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out)) + } + + // Register terminating-gateway config entry, linking it to db and redis (dne) + { + args := &structs.TerminatingGatewayConfigEntry{ + Name: "terminating-gateway", + Kind: structs.TerminatingGateway, + Services: []structs.LinkedService{ + { + Name: "db", + }, + { + Name: "redis", + CAFile: "/etc/certs/ca.pem", + CertFile: "/etc/certs/cert.pem", + KeyFile: "/etc/certs/key.pem", + }, + }, + } + + req := structs.ConfigEntryRequest{ + Op: structs.ConfigEntryUpsert, + Datacenter: "dc1", + Entry: args, + } + var configOutput bool + require.NoError(t, msgpackrpc.CallWithCodec(codec, "ConfigEntry.Apply", &req, &configOutput)) + require.True(t, configOutput) + } + + var out structs.IndexedCheckServiceNodes + req := structs.ServiceSpecificRequest{ + Datacenter: "dc1", + ServiceName: "db", + ServiceKind: structs.ServiceKindTerminatingGateway, + } + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Internal.ServiceGateways", &req, &out)) + + for _, n := range out.Nodes { + n.Node.RaftIndex = structs.RaftIndex{} + n.Service.RaftIndex = structs.RaftIndex{} + for _, m := range n.Checks { + m.RaftIndex = structs.RaftIndex{} + } + } + + expect := structs.CheckServiceNodes{ + structs.CheckServiceNode{ + Node: &structs.Node{ + Node: "foo", + RaftIndex: structs.RaftIndex{}, + Address: "10.1.2.2", + Datacenter: "dc1", + Partition: acl.DefaultPartitionName, + }, + Service: &structs.NodeService{ + Kind: structs.ServiceKindTerminatingGateway, + ID: "terminating-gateway-01", + Service: "terminating-gateway", + TaggedAddresses: map[string]structs.ServiceAddress{ + "consul-virtual:" + db.CompoundServiceName().String(): {Address: "240.0.0.1"}, + "consul-virtual:" + redis.CompoundServiceName().String(): {Address: "240.0.0.2"}, + }, + Weights: &structs.Weights{Passing: 1, Warning: 1}, + Port: 443, + Tags: []string{}, + Meta: map[string]string{}, + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + RaftIndex: structs.RaftIndex{}, + Address: "198.18.1.3", + }, + Checks: structs.HealthChecks{ + &structs.HealthCheck{ + Name: "terminating connect", + Node: "foo", + CheckID: "terminating connect", + Status: api.HealthPassing, + ServiceID: "terminating-gateway-01", + ServiceName: "terminating-gateway", + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + }, + }, + }, + } + + assert.Equal(t, expect, out.Nodes) +} + +func TestInternal_ServiceGatewayService_Terminating_ACL(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + t.Parallel() + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.PrimaryDatacenter = "dc1" + c.ACLsEnabled = true + c.ACLInitialManagementToken = "root" + c.ACLResolverSettings.ACLDefaultPolicy = "deny" + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + codec := rpcClient(t, s1) + defer codec.Close() + + testrpc.WaitForTestAgent(t, s1.RPC, "dc1", testrpc.WithToken("root")) + + // Create the ACL. + token, err := upsertTestTokenWithPolicyRules(codec, "root", "dc1", ` + service "db" { policy = "read" } + service "terminating-gateway" { policy = "read" } + node_prefix "" { policy = "read" }`) + require.NoError(t, err) + + // Register gateway and two service instances that will be associated with it + { + arg := structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + ID: "terminating-gateway", + Service: "terminating-gateway", + Kind: structs.ServiceKindTerminatingGateway, + Port: 443, + }, + Check: &structs.HealthCheck{ + Name: "terminating connect", + Status: api.HealthPassing, + ServiceID: "terminating-gateway", + }, + WriteRequest: structs.WriteRequest{Token: "root"}, + } + var out struct{} + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out)) + { + arg := structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + ID: "terminating-gateway2", + Service: "terminating-gateway2", + Kind: structs.ServiceKindTerminatingGateway, + Port: 444, + }, + Check: &structs.HealthCheck{ + Name: "terminating connect", + Status: api.HealthPassing, + ServiceID: "terminating-gateway2", + }, + WriteRequest: structs.WriteRequest{Token: "root"}, + } + var out struct{} + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out)) + } + + arg = structs.RegisterRequest{ + Datacenter: "dc1", + Node: "bar", + Address: "127.0.0.2", + Service: &structs.NodeService{ + ID: "db", + Service: "db", + }, + Check: &structs.HealthCheck{ + Name: "db-warning", + Status: api.HealthWarning, + ServiceID: "db", + }, + WriteRequest: structs.WriteRequest{Token: "root"}, + } + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out)) + + arg = structs.RegisterRequest{ + Datacenter: "dc1", + Node: "baz", + Address: "127.0.0.3", + Service: &structs.NodeService{ + ID: "api", + Service: "api", + }, + Check: &structs.HealthCheck{ + Name: "api-passing", + Status: api.HealthPassing, + ServiceID: "api", + }, + WriteRequest: structs.WriteRequest{Token: "root"}, + } + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out)) + } + + // Register terminating-gateway config entry, linking it to db and api + { + args := &structs.TerminatingGatewayConfigEntry{ + Name: "terminating-gateway", + Kind: structs.TerminatingGateway, + Services: []structs.LinkedService{ + {Name: "db"}, + {Name: "api"}, + }, + } + + req := structs.ConfigEntryRequest{ + Op: structs.ConfigEntryUpsert, + Datacenter: "dc1", + Entry: args, + WriteRequest: structs.WriteRequest{Token: "root"}, + } + var out bool + require.NoError(t, msgpackrpc.CallWithCodec(codec, "ConfigEntry.Apply", &req, &out)) + require.True(t, out) + } + + // Register terminating-gateway config entry, linking it to db and api + { + args := &structs.TerminatingGatewayConfigEntry{ + Name: "terminating-gateway2", + Kind: structs.TerminatingGateway, + Services: []structs.LinkedService{ + {Name: "db"}, + {Name: "api"}, + }, + } + + req := structs.ConfigEntryRequest{ + Op: structs.ConfigEntryUpsert, + Datacenter: "dc1", + Entry: args, + WriteRequest: structs.WriteRequest{Token: "root"}, + } + var out bool + require.NoError(t, msgpackrpc.CallWithCodec(codec, "ConfigEntry.Apply", &req, &out)) + require.True(t, out) + } + + var out structs.IndexedCheckServiceNodes + + // Not passing a token with service:read on Gateway leads to PermissionDenied + req := structs.ServiceSpecificRequest{ + Datacenter: "dc1", + ServiceName: "db", + ServiceKind: structs.ServiceKindTerminatingGateway, + } + err = msgpackrpc.CallWithCodec(codec, "Internal.ServiceGateways", &req, &out) + require.Error(t, err, acl.ErrPermissionDenied) + + // Passing a token without service:read on api leads to it getting filtered out + req = structs.ServiceSpecificRequest{ + Datacenter: "dc1", + ServiceName: "db", + ServiceKind: structs.ServiceKindTerminatingGateway, + QueryOptions: structs.QueryOptions{Token: token.SecretID}, + } + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Internal.ServiceGateways", &req, &out)) + + nodes := out.Nodes + require.Len(t, nodes, 1) + require.Equal(t, "foo", nodes[0].Node.Node) + require.Equal(t, structs.ServiceKindTerminatingGateway, nodes[0].Service.Kind) + require.Equal(t, "terminating-gateway", nodes[0].Service.Service) + require.Equal(t, "terminating-gateway", nodes[0].Service.ID) + require.True(t, out.QueryMeta.ResultsFilteredByACLs, "ResultsFilteredByACLs should be true") +} + +func TestInternal_ServiceGatewayService_Terminating_Destination(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + t.Parallel() + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + codec := rpcClient(t, s1) + defer codec.Close() + + testrpc.WaitForTestAgent(t, s1.RPC, "dc1") + + google := structs.NodeService{ + ID: "google", + Service: "google", + } + + // Register service-default with conflicting destination address + { + arg := structs.ConfigEntryRequest{ + Op: structs.ConfigEntryUpsert, + Datacenter: "dc1", + Entry: &structs.ServiceConfigEntry{ + Name: "google", + Destination: &structs.DestinationConfig{Addresses: []string{"www.google.com"}, Port: 443}, + EnterpriseMeta: *acl.DefaultEnterpriseMeta(), + }, + } + var configOutput bool + require.NoError(t, msgpackrpc.CallWithCodec(codec, "ConfigEntry.Apply", &arg, &configOutput)) + require.True(t, configOutput) + } + + // Register terminating-gateway config entry, linking it to google.com + { + arg := structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + ID: "terminating-gateway", + Service: "terminating-gateway", + Kind: structs.ServiceKindTerminatingGateway, + Port: 443, + }, + Check: &structs.HealthCheck{ + Name: "terminating connect", + Status: api.HealthPassing, + ServiceID: "terminating-gateway", + }, + } + var out struct{} + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Catalog.Register", &arg, &out)) + } + { + args := &structs.TerminatingGatewayConfigEntry{ + Name: "terminating-gateway", + Kind: structs.TerminatingGateway, + Services: []structs.LinkedService{ + { + Name: "google", + }, + }, + } + + req := structs.ConfigEntryRequest{ + Op: structs.ConfigEntryUpsert, + Datacenter: "dc1", + Entry: args, + } + var configOutput bool + require.NoError(t, msgpackrpc.CallWithCodec(codec, "ConfigEntry.Apply", &req, &configOutput)) + require.True(t, configOutput) + } + + var out structs.IndexedCheckServiceNodes + req := structs.ServiceSpecificRequest{ + Datacenter: "dc1", + ServiceName: "google", + ServiceKind: structs.ServiceKindTerminatingGateway, + } + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Internal.ServiceGateways", &req, &out)) + + nodes := out.Nodes + + for _, n := range nodes { + n.Node.RaftIndex = structs.RaftIndex{} + n.Service.RaftIndex = structs.RaftIndex{} + for _, m := range n.Checks { + m.RaftIndex = structs.RaftIndex{} + } + } + + expect := structs.CheckServiceNodes{ + structs.CheckServiceNode{ + Node: &structs.Node{ + Node: "foo", + RaftIndex: structs.RaftIndex{}, + Address: "127.0.0.1", + Datacenter: "dc1", + Partition: acl.DefaultPartitionName, + }, + Service: &structs.NodeService{ + Kind: structs.ServiceKindTerminatingGateway, + ID: "terminating-gateway", + Service: "terminating-gateway", + Weights: &structs.Weights{Passing: 1, Warning: 1}, + Port: 443, + Tags: []string{}, + Meta: map[string]string{}, + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + TaggedAddresses: map[string]structs.ServiceAddress{ + "consul-virtual:" + google.CompoundServiceName().String(): {Address: "240.0.0.1"}, + }, + RaftIndex: structs.RaftIndex{}, + Address: "", + }, + Checks: structs.HealthChecks{ + &structs.HealthCheck{ + Name: "terminating connect", + Node: "foo", + CheckID: "terminating connect", + Status: api.HealthPassing, + ServiceID: "terminating-gateway", + ServiceName: "terminating-gateway", + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + }, + }, + }, + } + + assert.Len(t, nodes, 1) + assert.Equal(t, expect, nodes) +} diff --git a/agent/consul/leader.go b/agent/consul/leader.go index eb197deb3..389b79056 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -315,7 +315,9 @@ func (s *Server) establishLeadership(ctx context.Context) error { s.startFederationStateAntiEntropy(ctx) - s.startPeeringStreamSync(ctx) + if s.config.PeeringEnabled { + s.startPeeringStreamSync(ctx) + } s.startDeferredDeletion(ctx) @@ -758,7 +760,9 @@ func (s *Server) stopACLReplication() { } func (s *Server) startDeferredDeletion(ctx context.Context) { - s.startPeeringDeferredDeletion(ctx) + if s.config.PeeringEnabled { + s.startPeeringDeferredDeletion(ctx) + } s.startTenancyDeferredDeletion(ctx) } diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index 5e90de6b0..d9b386386 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -36,7 +36,7 @@ func TestConnectCA_ConfigurationSet_ChangeKeyConfig_Primary(t *testing.T) { keyBits int }{ {connect.DefaultPrivateKeyType, connect.DefaultPrivateKeyBits}, - {"ec", 256}, + // {"ec", 256}, skip since values are same as Defaults {"ec", 384}, {"rsa", 2048}, {"rsa", 4096}, @@ -55,7 +55,7 @@ func TestConnectCA_ConfigurationSet_ChangeKeyConfig_Primary(t *testing.T) { providerState := map[string]string{"foo": "dc1-value"} // Initialize primary as the primary DC - dir1, srv := testServerWithConfig(t, func(c *Config) { + _, srv := testServerWithConfig(t, func(c *Config) { c.Datacenter = "dc1" c.PrimaryDatacenter = "dc1" c.Build = "1.6.0" @@ -63,12 +63,9 @@ func TestConnectCA_ConfigurationSet_ChangeKeyConfig_Primary(t *testing.T) { c.CAConfig.Config["PrivateKeyBits"] = src.keyBits c.CAConfig.Config["test_state"] = providerState }) - defer os.RemoveAll(dir1) - defer srv.Shutdown() codec := rpcClient(t, srv) - defer codec.Close() - testrpc.WaitForLeader(t, srv.RPC, "dc1") + waitForLeaderEstablishment(t, srv) testrpc.WaitForActiveCARoot(t, srv.RPC, "dc1", nil) var ( diff --git a/agent/consul/leader_peering.go b/agent/consul/leader_peering.go index 49369bbf7..3288a141a 100644 --- a/agent/consul/leader_peering.go +++ b/agent/consul/leader_peering.go @@ -6,7 +6,10 @@ import ( "crypto/tls" "crypto/x509" "fmt" + "time" + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-memdb" "github.com/hashicorp/go-multierror" @@ -14,6 +17,7 @@ import ( "golang.org/x/time/rate" "google.golang.org/grpc" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/keepalive" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" @@ -25,8 +29,72 @@ import ( "github.com/hashicorp/consul/proto/pbpeerstream" ) +var leaderExportedServicesCountKey = []string{"consul", "peering", "exported_services"} +var LeaderPeeringMetrics = []prometheus.GaugeDefinition{ + { + Name: leaderExportedServicesCountKey, + Help: "A gauge that tracks how many services are exported for the peering. " + + "The labels are \"peering\" and, for enterprise, \"partition\". " + + "We emit this metric every 9 seconds", + }, +} + func (s *Server) startPeeringStreamSync(ctx context.Context) { s.leaderRoutineManager.Start(ctx, peeringStreamsRoutineName, s.runPeeringSync) + s.leaderRoutineManager.Start(ctx, peeringStreamsMetricsRoutineName, s.runPeeringMetrics) +} + +func (s *Server) runPeeringMetrics(ctx context.Context) error { + ticker := time.NewTicker(s.config.MetricsReportingInterval) + defer ticker.Stop() + + logger := s.logger.Named(logging.PeeringMetrics) + defaultMetrics := metrics.Default + + for { + select { + case <-ctx.Done(): + logger.Info("stopping peering metrics") + + // "Zero-out" the metric on exit so that when prometheus scrapes this + // metric from a non-leader, it does not get a stale value. + metrics.SetGauge(leaderExportedServicesCountKey, float32(0)) + return nil + case <-ticker.C: + if err := s.emitPeeringMetricsOnce(logger, defaultMetrics()); err != nil { + s.logger.Error("error emitting peering stream metrics", "error", err) + } + } + } +} + +func (s *Server) emitPeeringMetricsOnce(logger hclog.Logger, metricsImpl *metrics.Metrics) error { + _, peers, err := s.fsm.State().PeeringList(nil, *structs.NodeEnterpriseMetaInPartition(structs.WildcardSpecifier)) + if err != nil { + return err + } + + for _, peer := range peers { + status, found := s.peerStreamServer.StreamStatus(peer.ID) + if !found { + logger.Trace("did not find status for", "peer_name", peer.Name) + continue + } + + esc := status.GetExportedServicesCount() + part := peer.Partition + labels := []metrics.Label{ + {Name: "peer_name", Value: peer.Name}, + {Name: "peer_id", Value: peer.ID}, + } + if part != "" { + labels = append(labels, metrics.Label{Name: "partition", Value: part}) + } + + metricsImpl.SetGaugeWithLabels(leaderExportedServicesCountKey, float32(esc), labels) + } + + return nil } func (s *Server) runPeeringSync(ctx context.Context) error { @@ -49,6 +117,7 @@ func (s *Server) runPeeringSync(ctx context.Context) error { func (s *Server) stopPeeringStreamSync() { // will be a no-op when not started s.leaderRoutineManager.Stop(peeringStreamsRoutineName) + s.leaderRoutineManager.Stop(peeringStreamsMetricsRoutineName) } // syncPeeringsAndBlock is a long-running goroutine that is responsible for watching @@ -225,6 +294,11 @@ func (s *Server) establishStream(ctx context.Context, logger hclog.Logger, peer retryCtx, cancel := context.WithCancel(ctx) cancelFns[peer.ID] = cancel + streamStatus, err := s.peerStreamTracker.Register(peer.ID) + if err != nil { + return fmt.Errorf("failed to register stream: %v", err) + } + // Establish a stream-specific retry so that retrying stream/conn errors isn't dependent on state store changes. go retryLoopBackoff(retryCtx, func() error { // Try a new address on each iteration by advancing the ring buffer on errors. @@ -238,8 +312,15 @@ func (s *Server) establishStream(ctx context.Context, logger hclog.Logger, peer logger.Trace("dialing peer", "addr", addr) conn, err := grpc.DialContext(retryCtx, addr, - grpc.WithBlock(), + // TODO(peering): use a grpc.WithStatsHandler here?) tlsOption, + // For keep alive parameters there is a larger comment in ClientConnPool.dial about that. + grpc.WithKeepaliveParams(keepalive.ClientParameters{ + Time: 30 * time.Second, + Timeout: 10 * time.Second, + // send keepalive pings even if there is no active streams + PermitWithoutStream: true, + }), ) if err != nil { return fmt.Errorf("failed to dial: %w", err) @@ -277,8 +358,7 @@ func (s *Server) establishStream(ctx context.Context, logger hclog.Logger, peer return err }, func(err error) { - // TODO(peering): These errors should be reported in the peer status, otherwise they're only in the logs. - // Lockable status isn't available here though. Could report it via the peering.Service? + streamStatus.TrackSendError(err.Error()) logger.Error("error managing peering stream", "peer_id", peer.ID, "error", err) }) diff --git a/agent/consul/leader_peering_test.go b/agent/consul/leader_peering_test.go index 222b59279..feaf5be02 100644 --- a/agent/consul/leader_peering_test.go +++ b/agent/consul/leader_peering_test.go @@ -4,9 +4,12 @@ import ( "context" "encoding/base64" "encoding/json" + "fmt" + "io/ioutil" "testing" "time" + "github.com/armon/go-metrics" "github.com/stretchr/testify/require" "google.golang.org/grpc" @@ -15,20 +18,34 @@ import ( "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/proto/pbpeering" + "github.com/hashicorp/consul/sdk/freeport" "github.com/hashicorp/consul/sdk/testutil/retry" "github.com/hashicorp/consul/testrpc" + "github.com/hashicorp/consul/types" ) func TestLeader_PeeringSync_Lifecycle_ClientDeletion(t *testing.T) { + t.Run("without-tls", func(t *testing.T) { + testLeader_PeeringSync_Lifecycle_ClientDeletion(t, false) + }) + t.Run("with-tls", func(t *testing.T) { + testLeader_PeeringSync_Lifecycle_ClientDeletion(t, true) + }) +} +func testLeader_PeeringSync_Lifecycle_ClientDeletion(t *testing.T, enableTLS bool) { if testing.Short() { t.Skip("too slow for testing.Short") } - // TODO(peering): Configure with TLS _, s1 := testServerWithConfig(t, func(c *Config) { - c.NodeName = "s1.dc1" + c.NodeName = "bob" c.Datacenter = "dc1" c.TLSConfig.Domain = "consul" + if enableTLS { + c.TLSConfig.GRPC.CAFile = "../../test/hostname/CertAuth.crt" + c.TLSConfig.GRPC.CertFile = "../../test/hostname/Bob.crt" + c.TLSConfig.GRPC.KeyFile = "../../test/hostname/Bob.key" + } }) testrpc.WaitForLeader(t, s1.RPC, "dc1") @@ -68,9 +85,14 @@ func TestLeader_PeeringSync_Lifecycle_ClientDeletion(t *testing.T) { // Bring up s2 and store s1's token so that it attempts to dial. _, s2 := testServerWithConfig(t, func(c *Config) { - c.NodeName = "s2.dc2" + c.NodeName = "betty" c.Datacenter = "dc2" c.PrimaryDatacenter = "dc2" + if enableTLS { + c.TLSConfig.GRPC.CAFile = "../../test/hostname/CertAuth.crt" + c.TLSConfig.GRPC.CertFile = "../../test/hostname/Betty.crt" + c.TLSConfig.GRPC.KeyFile = "../../test/hostname/Betty.key" + } }) testrpc.WaitForLeader(t, s2.RPC, "dc2") @@ -120,15 +142,27 @@ func TestLeader_PeeringSync_Lifecycle_ClientDeletion(t *testing.T) { } func TestLeader_PeeringSync_Lifecycle_ServerDeletion(t *testing.T) { + t.Run("without-tls", func(t *testing.T) { + testLeader_PeeringSync_Lifecycle_ServerDeletion(t, false) + }) + t.Run("with-tls", func(t *testing.T) { + testLeader_PeeringSync_Lifecycle_ServerDeletion(t, true) + }) +} +func testLeader_PeeringSync_Lifecycle_ServerDeletion(t *testing.T, enableTLS bool) { if testing.Short() { t.Skip("too slow for testing.Short") } - // TODO(peering): Configure with TLS _, s1 := testServerWithConfig(t, func(c *Config) { - c.NodeName = "s1.dc1" + c.NodeName = "bob" c.Datacenter = "dc1" c.TLSConfig.Domain = "consul" + if enableTLS { + c.TLSConfig.GRPC.CAFile = "../../test/hostname/CertAuth.crt" + c.TLSConfig.GRPC.CertFile = "../../test/hostname/Bob.crt" + c.TLSConfig.GRPC.KeyFile = "../../test/hostname/Bob.key" + } }) testrpc.WaitForLeader(t, s1.RPC, "dc1") @@ -164,9 +198,14 @@ func TestLeader_PeeringSync_Lifecycle_ServerDeletion(t *testing.T) { // Bring up s2 and store s1's token so that it attempts to dial. _, s2 := testServerWithConfig(t, func(c *Config) { - c.NodeName = "s2.dc2" + c.NodeName = "betty" c.Datacenter = "dc2" c.PrimaryDatacenter = "dc2" + if enableTLS { + c.TLSConfig.GRPC.CAFile = "../../test/hostname/CertAuth.crt" + c.TLSConfig.GRPC.CertFile = "../../test/hostname/Betty.crt" + c.TLSConfig.GRPC.KeyFile = "../../test/hostname/Betty.key" + } }) testrpc.WaitForLeader(t, s2.RPC, "dc2") @@ -215,6 +254,111 @@ func TestLeader_PeeringSync_Lifecycle_ServerDeletion(t *testing.T) { }) } +func TestLeader_PeeringSync_FailsForTLSError(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + t.Run("server-name-validation", func(t *testing.T) { + testLeader_PeeringSync_failsForTLSError(t, func(p *pbpeering.Peering) { + p.PeerServerName = "wrong.name" + }, `transport: authentication handshake failed: x509: certificate is valid for server.dc1.consul, bob.server.dc1.consul, not wrong.name`) + }) + t.Run("bad-ca-roots", func(t *testing.T) { + wrongRoot, err := ioutil.ReadFile("../../test/client_certs/rootca.crt") + require.NoError(t, err) + + testLeader_PeeringSync_failsForTLSError(t, func(p *pbpeering.Peering) { + p.PeerCAPems = []string{string(wrongRoot)} + }, `transport: authentication handshake failed: x509: certificate signed by unknown authority`) + }) +} + +func testLeader_PeeringSync_failsForTLSError(t *testing.T, peerMutateFn func(p *pbpeering.Peering), expectErr string) { + require.NotNil(t, peerMutateFn) + + _, s1 := testServerWithConfig(t, func(c *Config) { + c.NodeName = "bob" + c.Datacenter = "dc1" + c.TLSConfig.Domain = "consul" + + c.TLSConfig.GRPC.CAFile = "../../test/hostname/CertAuth.crt" + c.TLSConfig.GRPC.CertFile = "../../test/hostname/Bob.crt" + c.TLSConfig.GRPC.KeyFile = "../../test/hostname/Bob.key" + }) + testrpc.WaitForLeader(t, s1.RPC, "dc1") + + // Create a peering by generating a token + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + t.Cleanup(cancel) + + conn, err := grpc.DialContext(ctx, s1.config.RPCAddr.String(), + grpc.WithContextDialer(newServerDialer(s1.config.RPCAddr.String())), + grpc.WithInsecure(), + grpc.WithBlock()) + require.NoError(t, err) + defer conn.Close() + + peeringClient := pbpeering.NewPeeringServiceClient(conn) + + req := pbpeering.GenerateTokenRequest{ + PeerName: "my-peer-s2", + } + resp, err := peeringClient.GenerateToken(ctx, &req) + require.NoError(t, err) + + tokenJSON, err := base64.StdEncoding.DecodeString(resp.PeeringToken) + require.NoError(t, err) + + var token structs.PeeringToken + require.NoError(t, json.Unmarshal(tokenJSON, &token)) + + // S1 should not have a stream tracked for dc2 because s1 generated a token + // for baz, and therefore needs to wait to be dialed. + time.Sleep(1 * time.Second) + _, found := s1.peerStreamServer.StreamStatus(token.PeerID) + require.False(t, found) + + var ( + s2PeerID = "cc56f0b8-3885-4e78-8d7b-614a0c45712d" + ) + + // Bring up s2 and store s1's token so that it attempts to dial. + _, s2 := testServerWithConfig(t, func(c *Config) { + c.NodeName = "betty" + c.Datacenter = "dc2" + c.PrimaryDatacenter = "dc2" + + c.TLSConfig.GRPC.CAFile = "../../test/hostname/CertAuth.crt" + c.TLSConfig.GRPC.CertFile = "../../test/hostname/Betty.crt" + c.TLSConfig.GRPC.KeyFile = "../../test/hostname/Betty.key" + }) + testrpc.WaitForLeader(t, s2.RPC, "dc2") + + // Simulate a peering initiation event by writing a peering with data from a peering token. + // Eventually the leader in dc2 should dial and connect to the leader in dc1. + p := &pbpeering.Peering{ + ID: s2PeerID, + Name: "my-peer-s1", + PeerID: token.PeerID, + PeerCAPems: token.CA, + PeerServerName: token.ServerName, + PeerServerAddresses: token.ServerAddresses, + } + peerMutateFn(p) + require.True(t, p.ShouldDial()) + + // We maintain a pointer to the peering on the write so that we can get the ID without needing to re-query the state store. + require.NoError(t, s2.fsm.State().PeeringWrite(1000, p)) + + retry.Run(t, func(r *retry.R) { + status, found := s2.peerStreamTracker.StreamStatus(p.ID) + require.True(r, found) + require.False(r, status.Connected) + require.Contains(r, status.LastSendErrorMessage, expectErr) + }) +} + func TestLeader_Peering_DeferredDeletion(t *testing.T) { if testing.Short() { t.Skip("too slow for testing.Short") @@ -282,6 +426,120 @@ func TestLeader_Peering_DeferredDeletion(t *testing.T) { }) } +// Test that the dialing peer attempts to reestablish connections when the accepting peer +// shuts down without sending a Terminated message. +// +// To test this, we start the two peer servers (accepting and dialing), set up peering, and then shut down +// the accepting peer. This terminates the connection without sending a Terminated message. +// We then restart the accepting peer (we actually spin up a new server with the same config and port) and then +// assert that the dialing peer reestablishes the connection. +func TestLeader_Peering_DialerReestablishesConnectionOnError(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + // Reserve a gRPC port so we can restart the accepting server with the same port. + ports := freeport.GetN(t, 1) + acceptingServerPort := ports[0] + + _, acceptingServer := testServerWithConfig(t, func(c *Config) { + c.NodeName = "acceptingServer.dc1" + c.Datacenter = "dc1" + c.TLSConfig.Domain = "consul" + c.GRPCPort = acceptingServerPort + }) + testrpc.WaitForLeader(t, acceptingServer.RPC, "dc1") + + // Create a peering by generating a token. + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + t.Cleanup(cancel) + + conn, err := grpc.DialContext(ctx, acceptingServer.config.RPCAddr.String(), + grpc.WithContextDialer(newServerDialer(acceptingServer.config.RPCAddr.String())), + grpc.WithInsecure(), + grpc.WithBlock()) + require.NoError(t, err) + defer conn.Close() + + peeringClient := pbpeering.NewPeeringServiceClient(conn) + req := pbpeering.GenerateTokenRequest{ + PeerName: "my-peer-dialing-server", + } + resp, err := peeringClient.GenerateToken(ctx, &req) + require.NoError(t, err) + tokenJSON, err := base64.StdEncoding.DecodeString(resp.PeeringToken) + require.NoError(t, err) + var token structs.PeeringToken + require.NoError(t, json.Unmarshal(tokenJSON, &token)) + + var ( + dialingServerPeerID = token.PeerID + acceptingServerPeerID = "cc56f0b8-3885-4e78-8d7b-614a0c45712d" + ) + + // Bring up dialingServer and store acceptingServer's token so that it attempts to dial. + _, dialingServer := testServerWithConfig(t, func(c *Config) { + c.NodeName = "dialing-server.dc2" + c.Datacenter = "dc2" + c.PrimaryDatacenter = "dc2" + }) + testrpc.WaitForLeader(t, dialingServer.RPC, "dc2") + p := &pbpeering.Peering{ + ID: acceptingServerPeerID, + Name: "my-peer-accepting-server", + PeerID: token.PeerID, + PeerCAPems: token.CA, + PeerServerName: token.ServerName, + PeerServerAddresses: token.ServerAddresses, + } + require.True(t, p.ShouldDial()) + require.NoError(t, dialingServer.fsm.State().PeeringWrite(1000, p)) + + // Wait for the stream to be connected. + retry.Run(t, func(r *retry.R) { + status, found := dialingServer.peerStreamServer.StreamStatus(p.ID) + require.True(r, found) + require.True(r, status.Connected) + }) + + // Wait until the dialing server has sent its roots over. This avoids a race condition where the accepting server + // shuts down, but the dialing server is still sending messages to the stream. When this happens, an error is raised + // which causes the stream to restart. + // In this test, we want to test what happens when the stream is closed when there are _no_ messages being sent. + retry.Run(t, func(r *retry.R) { + _, bundle, err := acceptingServer.fsm.State().PeeringTrustBundleRead(nil, state.Query{Value: "my-peer-dialing-server"}) + require.NoError(r, err) + require.NotNil(r, bundle) + }) + + // Shutdown the accepting server. + require.NoError(t, acceptingServer.Shutdown()) + // Have to manually shut down the gRPC server otherwise it stays bound to the port. + acceptingServer.externalGRPCServer.Stop() + + // Mimic the server restarting by starting a new server with the same config. + _, acceptingServerRestart := testServerWithConfig(t, func(c *Config) { + c.NodeName = "acceptingServer.dc1" + c.Datacenter = "dc1" + c.TLSConfig.Domain = "consul" + c.GRPCPort = acceptingServerPort + }) + testrpc.WaitForLeader(t, acceptingServerRestart.RPC, "dc1") + + // Re-insert the peering state. + require.NoError(t, acceptingServerRestart.fsm.State().PeeringWrite(2000, &pbpeering.Peering{ + ID: dialingServerPeerID, + Name: "my-peer-dialing-server", + State: pbpeering.PeeringState_PENDING, + })) + + // The dialing peer should eventually reconnect. + retry.Run(t, func(r *retry.R) { + connStreams := acceptingServerRestart.peerStreamServer.ConnectedStreams() + require.Contains(r, connStreams, dialingServerPeerID) + }) +} + func insertTestPeeringData(t *testing.T, store *state.Store, peer string, lastIdx uint64) uint64 { lastIdx++ require.NoError(t, store.PeeringTrustBundleWrite(lastIdx, &pbpeering.PeeringTrustBundle{ @@ -309,11 +567,6 @@ func insertTestPeeringData(t *testing.T, store *state.Store, peer string, lastId Node: "aaa", PeerName: peer, }, - { - CheckID: structs.SerfCheckID, - Node: "aaa", - PeerName: peer, - }, }, })) @@ -336,11 +589,6 @@ func insertTestPeeringData(t *testing.T, store *state.Store, peer string, lastId Node: "bbb", PeerName: peer, }, - { - CheckID: structs.SerfCheckID, - Node: "bbb", - PeerName: peer, - }, }, })) @@ -363,13 +611,514 @@ func insertTestPeeringData(t *testing.T, store *state.Store, peer string, lastId Node: "ccc", PeerName: peer, }, - { - CheckID: structs.SerfCheckID, - Node: "ccc", - PeerName: peer, - }, }, })) return lastIdx } + +// TODO(peering): once we move away from keeping state in stream tracker only on leaders, move this test to consul/server_test maybe +func TestLeader_Peering_ImportedExportedServicesCount(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + // TODO(peering): Configure with TLS + _, s1 := testServerWithConfig(t, func(c *Config) { + c.NodeName = "s1.dc1" + c.Datacenter = "dc1" + c.TLSConfig.Domain = "consul" + }) + testrpc.WaitForLeader(t, s1.RPC, "dc1") + + // Create a peering by generating a token + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + t.Cleanup(cancel) + + conn, err := grpc.DialContext(ctx, s1.config.RPCAddr.String(), + grpc.WithContextDialer(newServerDialer(s1.config.RPCAddr.String())), + grpc.WithInsecure(), + grpc.WithBlock()) + require.NoError(t, err) + defer conn.Close() + + peeringClient := pbpeering.NewPeeringServiceClient(conn) + + req := pbpeering.GenerateTokenRequest{ + PeerName: "my-peer-s2", + } + resp, err := peeringClient.GenerateToken(ctx, &req) + require.NoError(t, err) + + tokenJSON, err := base64.StdEncoding.DecodeString(resp.PeeringToken) + require.NoError(t, err) + + var token structs.PeeringToken + require.NoError(t, json.Unmarshal(tokenJSON, &token)) + + var ( + s2PeerID = "cc56f0b8-3885-4e78-8d7b-614a0c45712d" + lastIdx = uint64(0) + ) + + // Bring up s2 and store s1's token so that it attempts to dial. + _, s2 := testServerWithConfig(t, func(c *Config) { + c.NodeName = "s2.dc2" + c.Datacenter = "dc2" + c.PrimaryDatacenter = "dc2" + }) + testrpc.WaitForLeader(t, s2.RPC, "dc2") + + // Simulate a peering initiation event by writing a peering with data from a peering token. + // Eventually the leader in dc2 should dial and connect to the leader in dc1. + p := &pbpeering.Peering{ + ID: s2PeerID, + Name: "my-peer-s1", + PeerID: token.PeerID, + PeerCAPems: token.CA, + PeerServerName: token.ServerName, + PeerServerAddresses: token.ServerAddresses, + } + require.True(t, p.ShouldDial()) + + lastIdx++ + require.NoError(t, s2.fsm.State().PeeringWrite(lastIdx, p)) + + /// add services to S1 to be synced to S2 + lastIdx++ + require.NoError(t, s1.FSM().State().EnsureRegistration(lastIdx, &structs.RegisterRequest{ + ID: types.NodeID(generateUUID()), + Node: "aaa", + Address: "10.0.0.1", + Service: &structs.NodeService{ + Service: "a-service", + ID: "a-service-1", + Port: 8080, + }, + Checks: structs.HealthChecks{ + { + CheckID: "a-service-1-check", + ServiceName: "a-service", + ServiceID: "a-service-1", + Node: "aaa", + }, + }, + })) + + lastIdx++ + require.NoError(t, s1.FSM().State().EnsureRegistration(lastIdx, &structs.RegisterRequest{ + ID: types.NodeID(generateUUID()), + + Node: "bbb", + Address: "10.0.0.2", + Service: &structs.NodeService{ + Service: "b-service", + ID: "b-service-1", + Port: 8080, + }, + Checks: structs.HealthChecks{ + { + CheckID: "b-service-1-check", + ServiceName: "b-service", + ServiceID: "b-service-1", + Node: "bbb", + }, + }, + })) + + lastIdx++ + require.NoError(t, s1.FSM().State().EnsureRegistration(lastIdx, &structs.RegisterRequest{ + ID: types.NodeID(generateUUID()), + + Node: "ccc", + Address: "10.0.0.3", + Service: &structs.NodeService{ + Service: "c-service", + ID: "c-service-1", + Port: 8080, + }, + Checks: structs.HealthChecks{ + { + CheckID: "c-service-1-check", + ServiceName: "c-service", + ServiceID: "c-service-1", + Node: "ccc", + }, + }, + })) + /// finished adding services + + type testCase struct { + name string + description string + exportedService structs.ExportedServicesConfigEntry + expectedImportedServsCount uint64 + expectedExportedServsCount uint64 + } + + testCases := []testCase{ + { + name: "wildcard", + description: "for a wildcard exported services, we want to see all services synced", + exportedService: structs.ExportedServicesConfigEntry{ + Name: "default", + Services: []structs.ExportedService{ + { + Name: structs.WildcardSpecifier, + Consumers: []structs.ServiceConsumer{ + { + PeerName: "my-peer-s2", + }, + }, + }, + }, + }, + expectedImportedServsCount: 4, // 3 services from above + the "consul" service + expectedExportedServsCount: 4, // 3 services from above + the "consul" service + }, + { + name: "no sync", + description: "update the config entry to allow no service sync", + exportedService: structs.ExportedServicesConfigEntry{ + Name: "default", + }, + expectedImportedServsCount: 0, // we want to see this decremented from 4 --> 0 + expectedExportedServsCount: 0, // we want to see this decremented from 4 --> 0 + }, + { + name: "just a, b services", + description: "export just two services", + exportedService: structs.ExportedServicesConfigEntry{ + Name: "default", + Services: []structs.ExportedService{ + { + Name: "a-service", + Consumers: []structs.ServiceConsumer{ + { + PeerName: "my-peer-s2", + }, + }, + }, + { + Name: "b-service", + Consumers: []structs.ServiceConsumer{ + { + PeerName: "my-peer-s2", + }, + }, + }, + }, + }, + expectedImportedServsCount: 2, + expectedExportedServsCount: 2, + }, + { + name: "unexport b service", + description: "by unexporting b we want to see the count decrement eventually", + exportedService: structs.ExportedServicesConfigEntry{ + Name: "default", + Services: []structs.ExportedService{ + { + Name: "a-service", + Consumers: []structs.ServiceConsumer{ + { + PeerName: "my-peer-s2", + }, + }, + }, + }, + }, + expectedImportedServsCount: 1, + expectedExportedServsCount: 1, + }, + { + name: "export c service", + description: "now export the c service and expect the count to increment", + exportedService: structs.ExportedServicesConfigEntry{ + Name: "default", + Services: []structs.ExportedService{ + { + Name: "a-service", + Consumers: []structs.ServiceConsumer{ + { + PeerName: "my-peer-s2", + }, + }, + }, + { + Name: "c-service", + Consumers: []structs.ServiceConsumer{ + { + PeerName: "my-peer-s2", + }, + }, + }, + }, + }, + expectedImportedServsCount: 2, + expectedExportedServsCount: 2, + }, + } + + conn2, err := grpc.DialContext(ctx, s2.config.RPCAddr.String(), + grpc.WithContextDialer(newServerDialer(s2.config.RPCAddr.String())), + grpc.WithInsecure(), + grpc.WithBlock()) + require.NoError(t, err) + defer conn2.Close() + + peeringClient2 := pbpeering.NewPeeringServiceClient(conn2) + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + lastIdx++ + require.NoError(t, s1.fsm.State().EnsureConfigEntry(lastIdx, &tc.exportedService)) + + // Check that imported services count on S2 are what we expect + retry.Run(t, func(r *retry.R) { + // on Read + resp, err := peeringClient2.PeeringRead(ctx, &pbpeering.PeeringReadRequest{Name: "my-peer-s1"}) + require.NoError(r, err) + require.NotNil(r, resp.Peering) + require.Equal(r, tc.expectedImportedServsCount, resp.Peering.ImportedServiceCount) + + // on List + resp2, err2 := peeringClient2.PeeringList(ctx, &pbpeering.PeeringListRequest{}) + require.NoError(r, err2) + require.NotEmpty(r, resp2.Peerings) + require.Equal(r, tc.expectedExportedServsCount, resp2.Peerings[0].ImportedServiceCount) + }) + + // Check that exported services count on S1 are what we expect + retry.Run(t, func(r *retry.R) { + // on Read + resp, err := peeringClient.PeeringRead(ctx, &pbpeering.PeeringReadRequest{Name: "my-peer-s2"}) + require.NoError(r, err) + require.NotNil(r, resp.Peering) + require.Equal(r, tc.expectedImportedServsCount, resp.Peering.ExportedServiceCount) + + // on List + resp2, err2 := peeringClient.PeeringList(ctx, &pbpeering.PeeringListRequest{}) + require.NoError(r, err2) + require.NotEmpty(r, resp2.Peerings) + require.Equal(r, tc.expectedExportedServsCount, resp2.Peerings[0].ExportedServiceCount) + }) + }) + } +} + +// TODO(peering): once we move away from keeping state in stream tracker only on leaders, move this test to consul/server_test maybe +func TestLeader_PeeringMetrics_emitPeeringMetrics(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + var ( + s2PeerID1 = generateUUID() + s2PeerID2 = generateUUID() + testContextTimeout = 60 * time.Second + lastIdx = uint64(0) + ) + + // TODO(peering): Configure with TLS + _, s1 := testServerWithConfig(t, func(c *Config) { + c.NodeName = "s1.dc1" + c.Datacenter = "dc1" + c.TLSConfig.Domain = "consul" + }) + testrpc.WaitForLeader(t, s1.RPC, "dc1") + + // Create a peering by generating a token + ctx, cancel := context.WithTimeout(context.Background(), testContextTimeout) + t.Cleanup(cancel) + + conn, err := grpc.DialContext(ctx, s1.config.RPCAddr.String(), + grpc.WithContextDialer(newServerDialer(s1.config.RPCAddr.String())), + grpc.WithInsecure(), + grpc.WithBlock()) + require.NoError(t, err) + defer conn.Close() + + peeringClient := pbpeering.NewPeeringServiceClient(conn) + + req := pbpeering.GenerateTokenRequest{ + PeerName: "my-peer-s2", + } + resp, err := peeringClient.GenerateToken(ctx, &req) + require.NoError(t, err) + + tokenJSON, err := base64.StdEncoding.DecodeString(resp.PeeringToken) + require.NoError(t, err) + + var token structs.PeeringToken + require.NoError(t, json.Unmarshal(tokenJSON, &token)) + + // Bring up s2 and store s1's token so that it attempts to dial. + _, s2 := testServerWithConfig(t, func(c *Config) { + c.NodeName = "s2.dc2" + c.Datacenter = "dc2" + c.PrimaryDatacenter = "dc2" + }) + testrpc.WaitForLeader(t, s2.RPC, "dc2") + + // Simulate exporting services in the tracker + { + // Simulate a peering initiation event by writing a peering with data from a peering token. + // Eventually the leader in dc2 should dial and connect to the leader in dc1. + p := &pbpeering.Peering{ + ID: s2PeerID1, + Name: "my-peer-s1", + PeerID: token.PeerID, + PeerCAPems: token.CA, + PeerServerName: token.ServerName, + PeerServerAddresses: token.ServerAddresses, + } + require.True(t, p.ShouldDial()) + lastIdx++ + require.NoError(t, s2.fsm.State().PeeringWrite(lastIdx, p)) + + p2 := &pbpeering.Peering{ + ID: s2PeerID2, + Name: "my-peer-s3", + PeerID: token.PeerID, // doesn't much matter what these values are + PeerCAPems: token.CA, + PeerServerName: token.ServerName, + PeerServerAddresses: token.ServerAddresses, + } + require.True(t, p2.ShouldDial()) + lastIdx++ + require.NoError(t, s2.fsm.State().PeeringWrite(lastIdx, p2)) + + // connect the stream + mst1, err := s2.peeringServer.Tracker.Connected(s2PeerID1) + require.NoError(t, err) + + // mimic tracking exported services + mst1.TrackExportedService(structs.ServiceName{Name: "a-service"}) + mst1.TrackExportedService(structs.ServiceName{Name: "b-service"}) + mst1.TrackExportedService(structs.ServiceName{Name: "c-service"}) + + // connect the stream + mst2, err := s2.peeringServer.Tracker.Connected(s2PeerID2) + require.NoError(t, err) + + // mimic tracking exported services + mst2.TrackExportedService(structs.ServiceName{Name: "d-service"}) + mst2.TrackExportedService(structs.ServiceName{Name: "e-service"}) + } + + // set up a metrics sink + sink := metrics.NewInmemSink(testContextTimeout, testContextTimeout) + cfg := metrics.DefaultConfig("us-west") + cfg.EnableHostname = false + met, err := metrics.New(cfg, sink) + require.NoError(t, err) + + errM := s2.emitPeeringMetricsOnce(s2.logger, met) + require.NoError(t, errM) + + retry.Run(t, func(r *retry.R) { + intervals := sink.Data() + require.Len(r, intervals, 1) + intv := intervals[0] + + // the keys for a Gauge value look like: {serviceName}.{prefix}.{key_name};{label=value};... + keyMetric1 := fmt.Sprintf("us-west.consul.peering.exported_services;peer_name=my-peer-s1;peer_id=%s", s2PeerID1) + metric1, ok := intv.Gauges[keyMetric1] + require.True(r, ok, fmt.Sprintf("did not find the key %q", keyMetric1)) + + require.Equal(r, float32(3), metric1.Value) // for a, b, c services + + keyMetric2 := fmt.Sprintf("us-west.consul.peering.exported_services;peer_name=my-peer-s3;peer_id=%s", s2PeerID2) + metric2, ok := intv.Gauges[keyMetric2] + require.True(r, ok, fmt.Sprintf("did not find the key %q", keyMetric2)) + + require.Equal(r, float32(2), metric2.Value) // for d, e services + }) +} + +// Test that the leader doesn't start its peering deletion routing when +// peering is disabled. +func TestLeader_Peering_NoDeletionWhenPeeringDisabled(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + _, s1 := testServerWithConfig(t, func(c *Config) { + c.NodeName = "s1.dc1" + c.Datacenter = "dc1" + c.TLSConfig.Domain = "consul" + c.PeeringEnabled = false + }) + testrpc.WaitForLeader(t, s1.RPC, "dc1") + + var ( + peerID = "cc56f0b8-3885-4e78-8d7b-614a0c45712d" + peerName = "my-peer-s2" + lastIdx = uint64(0) + ) + + // Simulate a peering initiation event by writing a peering to the state store. + lastIdx++ + require.NoError(t, s1.fsm.State().PeeringWrite(lastIdx, &pbpeering.Peering{ + ID: peerID, + Name: peerName, + })) + + // Mark the peering for deletion to trigger the termination sequence. + lastIdx++ + require.NoError(t, s1.fsm.State().PeeringWrite(lastIdx, &pbpeering.Peering{ + ID: peerID, + Name: peerName, + DeletedAt: structs.TimeToProto(time.Now()), + })) + + // The leader routine shouldn't be running so the peering should never get deleted. + require.Never(t, func() bool { + _, peering, err := s1.fsm.State().PeeringRead(nil, state.Query{ + Value: peerName, + }) + if err != nil { + t.Logf("unexpected err: %s", err) + return true + } + if peering == nil { + return true + } + return false + }, 7*time.Second, 1*time.Second, "peering should not have been deleted") +} + +// Test that the leader doesn't start its peering establishment routine +// when peering is disabled. +func TestLeader_Peering_NoEstablishmentWhenPeeringDisabled(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + _, s1 := testServerWithConfig(t, func(c *Config) { + c.NodeName = "s1.dc1" + c.Datacenter = "dc1" + c.TLSConfig.Domain = "consul" + c.PeeringEnabled = false + }) + testrpc.WaitForLeader(t, s1.RPC, "dc1") + + var ( + peerID = "cc56f0b8-3885-4e78-8d7b-614a0c45712d" + peerName = "my-peer-s2" + lastIdx = uint64(0) + ) + + // Simulate a peering initiation event by writing a peering to the state store. + require.NoError(t, s1.fsm.State().PeeringWrite(lastIdx, &pbpeering.Peering{ + ID: peerID, + Name: peerName, + PeerServerAddresses: []string{"1.2.3.4"}, + })) + + require.Never(t, func() bool { + _, found := s1.peerStreamTracker.StreamStatus(peerID) + return found + }, 7*time.Second, 1*time.Second, "peering should not have been established") +} diff --git a/agent/consul/peering_backend.go b/agent/consul/peering_backend.go index 4014bbdd2..1ab1b5c95 100644 --- a/agent/consul/peering_backend.go +++ b/agent/consul/peering_backend.go @@ -7,6 +7,8 @@ import ( "strconv" "sync" + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/acl/resolver" "github.com/hashicorp/consul/agent/consul/stream" "github.com/hashicorp/consul/agent/grpc-external/services/peerstream" "github.com/hashicorp/consul/agent/rpc/peering" @@ -52,7 +54,7 @@ func (b *PeeringBackend) GetLeaderAddress() string { // GetAgentCACertificates gets the server's raw CA data from its TLS Configurator. func (b *PeeringBackend) GetAgentCACertificates() ([]string, error) { // TODO(peering): handle empty CA pems - return b.srv.tlsConfigurator.ManualCAPems(), nil + return b.srv.tlsConfigurator.GRPCManualCAPems(), nil } // GetServerAddresses looks up server node addresses from the state store. @@ -160,3 +162,7 @@ func (b *PeeringBackend) CatalogDeregister(req *structs.DeregisterRequest) error _, err := b.srv.leaderRaftApply("Catalog.Deregister", structs.DeregisterRequestType, req) return err } + +func (b *PeeringBackend) ResolveTokenAndDefaultMeta(token string, entMeta *acl.EnterpriseMeta, authzCtx *acl.AuthorizerContext) (resolver.Result, error) { + return b.srv.ResolveTokenAndDefaultMeta(token, entMeta, authzCtx) +} diff --git a/agent/consul/peering_backend_oss_test.go b/agent/consul/peering_backend_oss_test.go index 5996690ea..3c120d26f 100644 --- a/agent/consul/peering_backend_oss_test.go +++ b/agent/consul/peering_backend_oss_test.go @@ -42,8 +42,7 @@ func TestPeeringBackend_RejectsPartition(t *testing.T) { peeringClient := pbpeering.NewPeeringServiceClient(conn) req := pbpeering.GenerateTokenRequest{ - Datacenter: "dc1", - Partition: "test", + Partition: "test", } _, err = peeringClient.GenerateToken(ctx, &req) require.Error(t, err) @@ -77,9 +76,8 @@ func TestPeeringBackend_IgnoresDefaultPartition(t *testing.T) { peeringClient := pbpeering.NewPeeringServiceClient(conn) req := pbpeering.GenerateTokenRequest{ - Datacenter: "dc1", - PeerName: "my-peer", - Partition: "DeFaUlT", + PeerName: "my-peer", + Partition: "DeFaUlT", } _, err = peeringClient.GenerateToken(ctx, &req) require.NoError(t, err) diff --git a/agent/consul/peering_backend_test.go b/agent/consul/peering_backend_test.go index 6d6344a29..fc73ba53d 100644 --- a/agent/consul/peering_backend_test.go +++ b/agent/consul/peering_backend_test.go @@ -15,43 +15,6 @@ import ( "github.com/hashicorp/consul/testrpc" ) -func TestPeeringBackend_DoesNotForwardToDifferentDC(t *testing.T) { - if testing.Short() { - t.Skip("too slow for testing.Short") - } - - t.Parallel() - _, s1 := testServerDC(t, "dc1") - _, s2 := testServerDC(t, "dc2") - - joinWAN(t, s2, s1) - - testrpc.WaitForLeader(t, s1.RPC, "dc1") - testrpc.WaitForLeader(t, s2.RPC, "dc2") - - // make a grpc client to dial s2 directly - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) - t.Cleanup(cancel) - - conn, err := gogrpc.DialContext(ctx, s2.config.RPCAddr.String(), - gogrpc.WithContextDialer(newServerDialer(s2.config.RPCAddr.String())), - gogrpc.WithInsecure(), - gogrpc.WithBlock()) - require.NoError(t, err) - t.Cleanup(func() { conn.Close() }) - - peeringClient := pbpeering.NewPeeringServiceClient(conn) - - // GenerateToken request should fail against dc1, because we are dialing dc2. The GenerateToken request should never be forwarded across datacenters. - req := pbpeering.GenerateTokenRequest{ - PeerName: "peer1-usw1", - Datacenter: "dc1", - } - _, err = peeringClient.GenerateToken(ctx, &req) - require.Error(t, err) - require.Contains(t, err.Error(), "requests to generate peering tokens cannot be forwarded to remote datacenters") -} - func TestPeeringBackend_ForwardToLeader(t *testing.T) { t.Parallel() @@ -86,8 +49,7 @@ func TestPeeringBackend_ForwardToLeader(t *testing.T) { testutil.RunStep(t, "forward a write", func(t *testing.T) { // Do the grpc Write call to server2 req := pbpeering.GenerateTokenRequest{ - Datacenter: "dc1", - PeerName: "foo", + PeerName: "foo", } _, err := peeringClient.GenerateToken(ctx, &req) require.NoError(t, err) diff --git a/agent/consul/prepared_query/template_test.go b/agent/consul/prepared_query/template_test.go index 05cbc17da..3fbf2d5af 100644 --- a/agent/consul/prepared_query/template_test.go +++ b/agent/consul/prepared_query/template_test.go @@ -22,7 +22,7 @@ var ( }, Service: structs.ServiceQuery{ Service: "${name.full}", - Failover: structs.QueryDatacenterOptions{ + Failover: structs.QueryFailoverOptions{ Datacenters: []string{ "${name.full}", "${name.prefix}", @@ -69,7 +69,7 @@ var ( }, Service: structs.ServiceQuery{ Service: "${name.full}", - Failover: structs.QueryDatacenterOptions{ + Failover: structs.QueryFailoverOptions{ Datacenters: []string{ "dc1", "dc2", diff --git a/agent/consul/prepared_query/walk_test.go b/agent/consul/prepared_query/walk_test.go index e45aa3a1e..ad71e0fed 100644 --- a/agent/consul/prepared_query/walk_test.go +++ b/agent/consul/prepared_query/walk_test.go @@ -20,7 +20,7 @@ func TestWalk_ServiceQuery(t *testing.T) { service := &structs.ServiceQuery{ Service: "the-service", - Failover: structs.QueryDatacenterOptions{ + Failover: structs.QueryFailoverOptions{ Datacenters: []string{"dc1", "dc2"}, }, Near: "_agent", diff --git a/agent/consul/prepared_query_endpoint.go b/agent/consul/prepared_query_endpoint.go index fc0642b6f..7215161f3 100644 --- a/agent/consul/prepared_query_endpoint.go +++ b/agent/consul/prepared_query_endpoint.go @@ -187,11 +187,16 @@ func parseService(svc *structs.ServiceQuery) error { return fmt.Errorf("Must provide a Service name to query") } + failover := svc.Failover // NearestN can be 0 which means "don't fail over by RTT". - if svc.Failover.NearestN < 0 { + if failover.NearestN < 0 { return fmt.Errorf("Bad NearestN '%d', must be >= 0", svc.Failover.NearestN) } + if (failover.NearestN != 0 || len(failover.Datacenters) != 0) && len(failover.Targets) != 0 { + return fmt.Errorf("Targets cannot be populated with NearestN or Datacenters") + } + // Make sure the metadata filters are valid if err := structs.ValidateNodeMetadata(svc.NodeMeta, true); err != nil { return err @@ -462,7 +467,7 @@ func (p *PreparedQuery) Execute(args *structs.PreparedQueryExecuteRequest, // and bail out. Otherwise, we fail over and try remote DCs, as allowed // by the query setup. if len(reply.Nodes) == 0 { - wrapper := &queryServerWrapper{p.srv} + wrapper := &queryServerWrapper{srv: p.srv, executeRemote: p.ExecuteRemote} if err := queryFailover(wrapper, query, args, reply); err != nil { return err } @@ -565,8 +570,13 @@ func (p *PreparedQuery) execute(query *structs.PreparedQuery, reply.Nodes = nodes reply.DNS = query.DNS - // Stamp the result for this datacenter. - reply.Datacenter = p.srv.config.Datacenter + // Stamp the result with its this datacenter or peer. + if peerName := query.Service.PeerName; peerName != "" { + reply.PeerName = peerName + reply.Datacenter = "" + } else { + reply.Datacenter = p.srv.config.Datacenter + } return nil } @@ -651,12 +661,24 @@ func serviceMetaFilter(filters map[string]string, nodes structs.CheckServiceNode type queryServer interface { GetLogger() hclog.Logger GetOtherDatacentersByDistance() ([]string, error) - ForwardDC(method, dc string, args interface{}, reply interface{}) error + GetLocalDC() string + ExecuteRemote(args *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error } // queryServerWrapper applies the queryServer interface to a Server. type queryServerWrapper struct { - srv *Server + srv *Server + executeRemote func(args *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error +} + +// GetLocalDC returns the name of the local datacenter. +func (q *queryServerWrapper) GetLocalDC() string { + return q.srv.config.Datacenter +} + +// ExecuteRemote calls ExecuteRemote on PreparedQuery. +func (q *queryServerWrapper) ExecuteRemote(args *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + return q.executeRemote(args, reply) } // GetLogger returns the server's logger. @@ -683,11 +705,6 @@ func (q *queryServerWrapper) GetOtherDatacentersByDistance() ([]string, error) { return result, nil } -// ForwardDC calls into the server's RPC forwarder. -func (q *queryServerWrapper) ForwardDC(method, dc string, args interface{}, reply interface{}) error { - return q.srv.forwardDC(method, dc, args, reply) -} - // queryFailover runs an algorithm to determine which DCs to try and then calls // them to try to locate alternative services. func queryFailover(q queryServer, query *structs.PreparedQuery, @@ -709,7 +726,7 @@ func queryFailover(q queryServer, query *structs.PreparedQuery, // Build a candidate list of DCs to try, starting with the nearest N // from RTTs. - var dcs []string + var targets []structs.QueryFailoverTarget index := make(map[string]struct{}) if query.Service.Failover.NearestN > 0 { for i, dc := range nearest { @@ -717,30 +734,36 @@ func queryFailover(q queryServer, query *structs.PreparedQuery, break } - dcs = append(dcs, dc) + targets = append(targets, structs.QueryFailoverTarget{Datacenter: dc}) index[dc] = struct{}{} } } // Then add any DCs explicitly listed that weren't selected above. - for _, dc := range query.Service.Failover.Datacenters { + for _, target := range query.Service.Failover.AsTargets() { // This will prevent a log of other log spammage if we do not // attempt to talk to datacenters we don't know about. - if _, ok := known[dc]; !ok { - q.GetLogger().Debug("Skipping unknown datacenter in prepared query", "datacenter", dc) - continue + if dc := target.Datacenter; dc != "" { + if _, ok := known[dc]; !ok { + q.GetLogger().Debug("Skipping unknown datacenter in prepared query", "datacenter", dc) + continue + } + + // This will make sure we don't re-try something that fails + // from the NearestN list. + if _, ok := index[dc]; !ok { + targets = append(targets, target) + } } - // This will make sure we don't re-try something that fails - // from the NearestN list. - if _, ok := index[dc]; !ok { - dcs = append(dcs, dc) + if target.PeerName != "" { + targets = append(targets, target) } } // Now try the selected DCs in priority order. failovers := 0 - for _, dc := range dcs { + for _, target := range targets { // This keeps track of how many iterations we actually run. failovers++ @@ -752,7 +775,15 @@ func queryFailover(q queryServer, query *structs.PreparedQuery, // through this slice across successive RPC calls. reply.Nodes = nil - // Note that we pass along the limit since it can be applied + // Reset PeerName because it may have been set by a previous failover + // target. + query.Service.PeerName = target.PeerName + dc := target.Datacenter + if target.PeerName != "" { + dc = q.GetLocalDC() + } + + // Note that we pass along the limit since may be applied // remotely to save bandwidth. We also pass along the consistency // mode information and token we were given, so that applies to // the remote query as well. @@ -763,9 +794,11 @@ func queryFailover(q queryServer, query *structs.PreparedQuery, QueryOptions: args.QueryOptions, Connect: args.Connect, } - if err := q.ForwardDC("PreparedQuery.ExecuteRemote", dc, remote, reply); err != nil { + + if err = q.ExecuteRemote(remote, reply); err != nil { q.GetLogger().Warn("Failed querying for service in datacenter", "service", query.Service.Service, + "peerName", query.Service.PeerName, "datacenter", dc, "error", err, ) diff --git a/agent/consul/prepared_query_endpoint_test.go b/agent/consul/prepared_query_endpoint_test.go index 30de90fb2..4965a2a0d 100644 --- a/agent/consul/prepared_query_endpoint_test.go +++ b/agent/consul/prepared_query_endpoint_test.go @@ -2,6 +2,9 @@ package consul import ( "bytes" + "context" + "encoding/base64" + "encoding/json" "fmt" "os" "reflect" @@ -14,6 +17,7 @@ import ( "github.com/hashicorp/serf/coordinate" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "google.golang.org/grpc" msgpackrpc "github.com/hashicorp/consul-net-rpc/net-rpc-msgpackrpc" "github.com/hashicorp/consul-net-rpc/net/rpc" @@ -23,6 +27,7 @@ import ( "github.com/hashicorp/consul/agent/structs/aclfilter" tokenStore "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/api" + "github.com/hashicorp/consul/proto/pbpeering" "github.com/hashicorp/consul/sdk/testutil/retry" "github.com/hashicorp/consul/testrpc" "github.com/hashicorp/consul/types" @@ -82,8 +87,25 @@ func TestPreparedQuery_Apply(t *testing.T) { t.Fatalf("bad: %v", err) } - // Fix that and make sure it propagates an error from the Raft apply. + // Fix that and ensure Targets and NearestN cannot be set at the same time. + query.Query.Service.Failover.NearestN = 1 + query.Query.Service.Failover.Targets = []structs.QueryFailoverTarget{{PeerName: "peer"}} + err = msgpackrpc.CallWithCodec(codec, "PreparedQuery.Apply", &query, &reply) + if err == nil || !strings.Contains(err.Error(), "Targets cannot be populated with") { + t.Fatalf("bad: %v", err) + } + + // Fix that and ensure Targets and Datacenters cannot be set at the same time. query.Query.Service.Failover.NearestN = 0 + query.Query.Service.Failover.Datacenters = []string{"dc2"} + query.Query.Service.Failover.Targets = []structs.QueryFailoverTarget{{PeerName: "peer"}} + err = msgpackrpc.CallWithCodec(codec, "PreparedQuery.Apply", &query, &reply) + if err == nil || !strings.Contains(err.Error(), "Targets cannot be populated with") { + t.Fatalf("bad: %v", err) + } + + // Fix that and make sure it propagates an error from the Raft apply. + query.Query.Service.Failover.Targets = nil query.Query.Session = "nope" err = msgpackrpc.CallWithCodec(codec, "PreparedQuery.Apply", &query, &reply) if err == nil || !strings.Contains(err.Error(), "invalid session") { @@ -1442,6 +1464,17 @@ func TestPreparedQuery_Execute(t *testing.T) { s2.tokens.UpdateReplicationToken("root", tokenStore.TokenSourceConfig) + dir3, s3 := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "dc3" + c.PrimaryDatacenter = "dc3" + c.NodeName = "acceptingServer.dc3" + }) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + waitForLeaderEstablishment(t, s3) + codec3 := rpcClient(t, s3) + defer codec3.Close() + // Try to WAN join. joinWAN(t, s2, s1) retry.Run(t, func(r *retry.R) { @@ -1456,6 +1489,70 @@ func TestPreparedQuery_Execute(t *testing.T) { // check for RPC forwarding testrpc.WaitForLeader(t, s1.RPC, "dc1", testrpc.WithToken("root")) testrpc.WaitForLeader(t, s1.RPC, "dc2", testrpc.WithToken("root")) + testrpc.WaitForLeader(t, s3.RPC, "dc3") + + acceptingPeerName := "my-peer-accepting-server" + dialingPeerName := "my-peer-dialing-server" + + // Set up peering between dc1 (dailing) and dc3 (accepting) and export the foo service + { + // Create a peering by generating a token. + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + t.Cleanup(cancel) + + conn, err := grpc.DialContext(ctx, s3.config.RPCAddr.String(), + grpc.WithContextDialer(newServerDialer(s3.config.RPCAddr.String())), + grpc.WithInsecure(), + grpc.WithBlock()) + require.NoError(t, err) + defer conn.Close() + + peeringClient := pbpeering.NewPeeringServiceClient(conn) + req := pbpeering.GenerateTokenRequest{ + PeerName: dialingPeerName, + } + resp, err := peeringClient.GenerateToken(ctx, &req) + require.NoError(t, err) + tokenJSON, err := base64.StdEncoding.DecodeString(resp.PeeringToken) + require.NoError(t, err) + var token structs.PeeringToken + require.NoError(t, json.Unmarshal(tokenJSON, &token)) + + p := &pbpeering.Peering{ + ID: "cc56f0b8-3885-4e78-8d7b-614a0c45712d", + Name: acceptingPeerName, + PeerID: token.PeerID, + PeerCAPems: token.CA, + PeerServerName: token.ServerName, + PeerServerAddresses: token.ServerAddresses, + } + require.True(t, p.ShouldDial()) + require.NoError(t, s1.fsm.State().PeeringWrite(1000, p)) + + // Wait for the stream to be connected. + retry.Run(t, func(r *retry.R) { + status, found := s1.peerStreamServer.StreamStatus(p.ID) + require.True(r, found) + require.True(r, status.Connected) + }) + + exportedServices := structs.ConfigEntryRequest{ + Op: structs.ConfigEntryUpsert, + Datacenter: "dc3", + Entry: &structs.ExportedServicesConfigEntry{ + Name: "default", + Services: []structs.ExportedService{ + { + Name: "foo", + Consumers: []structs.ServiceConsumer{{PeerName: dialingPeerName}}, + }, + }, + }, + } + var configOutput bool + require.NoError(t, msgpackrpc.CallWithCodec(codec3, "ConfigEntry.Apply", &exportedServices, &configOutput)) + require.True(t, configOutput) + } execNoNodesToken := createTokenWithPolicyName(t, codec1, "no-nodes", `service_prefix "foo" { policy = "read" }`, "root") rules := ` @@ -1485,9 +1582,16 @@ func TestPreparedQuery_Execute(t *testing.T) { // Set up some nodes in each DC that host the service. { for i := 0; i < 10; i++ { - for _, dc := range []string{"dc1", "dc2"} { + for _, d := range []struct { + codec rpc.ClientCodec + dc string + }{ + {codec1, "dc1"}, + {codec2, "dc2"}, + {codec3, "dc3"}, + } { req := structs.RegisterRequest{ - Datacenter: dc, + Datacenter: d.dc, Node: fmt.Sprintf("node%d", i+1), Address: fmt.Sprintf("127.0.0.%d", i+1), NodeMeta: map[string]string{ @@ -1497,7 +1601,7 @@ func TestPreparedQuery_Execute(t *testing.T) { Service: &structs.NodeService{ Service: "foo", Port: 8000, - Tags: []string{dc, fmt.Sprintf("tag%d", i+1)}, + Tags: []string{d.dc, fmt.Sprintf("tag%d", i+1)}, Meta: map[string]string{ "svc-group": fmt.Sprintf("%d", i%2), "foo": "true", @@ -1510,15 +1614,8 @@ func TestPreparedQuery_Execute(t *testing.T) { req.Service.Meta["unique"] = "true" } - var codec rpc.ClientCodec - if dc == "dc1" { - codec = codec1 - } else { - codec = codec2 - } - var reply struct{} - if err := msgpackrpc.CallWithCodec(codec, "Catalog.Register", &req, &reply); err != nil { + if err := msgpackrpc.CallWithCodec(d.codec, "Catalog.Register", &req, &reply); err != nil { t.Fatalf("err: %v", err) } } @@ -1576,6 +1673,17 @@ func TestPreparedQuery_Execute(t *testing.T) { assert.True(t, reply.QueryMeta.KnownLeader) } + expectFailoverPeerNodes := func(t *testing.T, query *structs.PreparedQueryRequest, reply *structs.PreparedQueryExecuteResponse, n int) { + t.Helper() + assert.Len(t, reply.Nodes, n) + assert.Equal(t, "", reply.Datacenter) + assert.Equal(t, acceptingPeerName, reply.PeerName) + assert.Equal(t, 2, reply.Failovers) + assert.Equal(t, query.Query.Service.Service, reply.Service) + assert.Equal(t, query.Query.DNS, reply.DNS) + assert.True(t, reply.QueryMeta.KnownLeader) + } + t.Run("run the registered query", func(t *testing.T) { req := structs.PreparedQueryExecuteRequest{ Datacenter: "dc1", @@ -1962,10 +2070,10 @@ func TestPreparedQuery_Execute(t *testing.T) { require.NoError(t, msgpackrpc.CallWithCodec(codec1, "PreparedQuery.Apply", &query, &query.Query.ID)) // Update the health of a node to mark it critical. - setHealth := func(t *testing.T, node string, health string) { + setHealth := func(t *testing.T, codec rpc.ClientCodec, dc string, node string, health string) { t.Helper() req := structs.RegisterRequest{ - Datacenter: "dc1", + Datacenter: dc, Node: node, Address: "127.0.0.1", Service: &structs.NodeService{ @@ -1981,9 +2089,9 @@ func TestPreparedQuery_Execute(t *testing.T) { WriteRequest: structs.WriteRequest{Token: "root"}, } var reply struct{} - require.NoError(t, msgpackrpc.CallWithCodec(codec1, "Catalog.Register", &req, &reply)) + require.NoError(t, msgpackrpc.CallWithCodec(codec, "Catalog.Register", &req, &reply)) } - setHealth(t, "node1", api.HealthCritical) + setHealth(t, codec1, "dc1", "node1", api.HealthCritical) // The failing node should be filtered. t.Run("failing node filtered", func(t *testing.T) { @@ -2003,7 +2111,7 @@ func TestPreparedQuery_Execute(t *testing.T) { }) // Upgrade it to a warning and re-query, should be 10 nodes again. - setHealth(t, "node1", api.HealthWarning) + setHealth(t, codec1, "dc1", "node1", api.HealthWarning) t.Run("warning nodes are included", func(t *testing.T) { req := structs.PreparedQueryExecuteRequest{ Datacenter: "dc1", @@ -2173,7 +2281,7 @@ func TestPreparedQuery_Execute(t *testing.T) { // Now fail everything in dc1 and we should get an empty list back. for i := 0; i < 10; i++ { - setHealth(t, fmt.Sprintf("node%d", i+1), api.HealthCritical) + setHealth(t, codec1, "dc1", fmt.Sprintf("node%d", i+1), api.HealthCritical) } t.Run("everything is failing so should get empty list", func(t *testing.T) { req := structs.PreparedQueryExecuteRequest{ @@ -2308,6 +2416,61 @@ func TestPreparedQuery_Execute(t *testing.T) { assert.NotEqual(t, "node3", node.Node.Node) } }) + + // Modify the query to have it fail over to a bogus DC and then dc2. + query.Query.Service.Failover = structs.QueryFailoverOptions{ + Targets: []structs.QueryFailoverTarget{ + {Datacenter: "dc2"}, + {PeerName: acceptingPeerName}, + }, + } + require.NoError(t, msgpackrpc.CallWithCodec(codec1, "PreparedQuery.Apply", &query, &query.Query.ID)) + + // Ensure the foo service has fully replicated. + retry.Run(t, func(r *retry.R) { + _, nodes, err := s1.fsm.State().CheckServiceNodes(nil, "foo", nil, acceptingPeerName) + require.NoError(r, err) + require.Len(r, nodes, 10) + }) + + // Now we should see 9 nodes from dc2 + t.Run("failing over to cluster peers", func(t *testing.T) { + req := structs.PreparedQueryExecuteRequest{ + Datacenter: "dc1", + QueryIDOrName: query.Query.ID, + QueryOptions: structs.QueryOptions{Token: execToken}, + } + + var reply structs.PreparedQueryExecuteResponse + require.NoError(t, msgpackrpc.CallWithCodec(codec1, "PreparedQuery.Execute", &req, &reply)) + + for _, node := range reply.Nodes { + assert.NotEqual(t, "node3", node.Node.Node) + } + expectFailoverNodes(t, &query, &reply, 9) + }) + + // Set all checks in dc2 as critical + for i := 0; i < 10; i++ { + setHealth(t, codec2, "dc2", fmt.Sprintf("node%d", i+1), api.HealthCritical) + } + + // Now we should see 9 nodes from dc3 (we have the tag filter still) + t.Run("failing over to cluster peers", func(t *testing.T) { + req := structs.PreparedQueryExecuteRequest{ + Datacenter: "dc1", + QueryIDOrName: query.Query.ID, + QueryOptions: structs.QueryOptions{Token: execToken}, + } + + var reply structs.PreparedQueryExecuteResponse + require.NoError(t, msgpackrpc.CallWithCodec(codec1, "PreparedQuery.Execute", &req, &reply)) + + for _, node := range reply.Nodes { + assert.NotEqual(t, "node3", node.Node.Node) + } + expectFailoverPeerNodes(t, &query, &reply, 9) + }) } func TestPreparedQuery_Execute_ForwardLeader(t *testing.T) { @@ -2724,7 +2887,9 @@ func TestPreparedQuery_Wrapper(t *testing.T) { joinWAN(t, s2, s1) // Try all the operations on a real server via the wrapper. - wrapper := &queryServerWrapper{s1} + wrapper := &queryServerWrapper{srv: s1, executeRemote: func(args *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + return nil + }} wrapper.GetLogger().Debug("Test") ret, err := wrapper.GetOtherDatacentersByDistance() @@ -2746,7 +2911,7 @@ type mockQueryServer struct { Datacenters []string DatacentersError error QueryLog []string - QueryFn func(dc string, args interface{}, reply interface{}) error + QueryFn func(args *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error Logger hclog.Logger LogBuffer *bytes.Buffer } @@ -2768,17 +2933,27 @@ func (m *mockQueryServer) GetLogger() hclog.Logger { return m.Logger } +func (m *mockQueryServer) GetLocalDC() string { + return "dc1" +} + func (m *mockQueryServer) GetOtherDatacentersByDistance() ([]string, error) { return m.Datacenters, m.DatacentersError } -func (m *mockQueryServer) ForwardDC(method, dc string, args interface{}, reply interface{}) error { - m.QueryLog = append(m.QueryLog, fmt.Sprintf("%s:%s", dc, method)) - if ret, ok := reply.(*structs.PreparedQueryExecuteResponse); ok { - ret.Datacenter = dc +func (m *mockQueryServer) ExecuteRemote(args *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + peerName := args.Query.Service.PeerName + dc := args.Datacenter + if peerName != "" { + m.QueryLog = append(m.QueryLog, fmt.Sprintf("peer:%s", peerName)) + } else { + m.QueryLog = append(m.QueryLog, fmt.Sprintf("%s:%s", dc, "PreparedQuery.ExecuteRemote")) } + reply.PeerName = peerName + reply.Datacenter = dc + if m.QueryFn != nil { - return m.QueryFn(dc, args, reply) + return m.QueryFn(args, reply) } return nil } @@ -2788,7 +2963,7 @@ func TestPreparedQuery_queryFailover(t *testing.T) { query := &structs.PreparedQuery{ Name: "test", Service: structs.ServiceQuery{ - Failover: structs.QueryDatacenterOptions{ + Failover: structs.QueryFailoverOptions{ NearestN: 0, Datacenters: []string{""}, }, @@ -2862,10 +3037,9 @@ func TestPreparedQuery_queryFailover(t *testing.T) { { mock := &mockQueryServer{ Datacenters: []string{"dc1", "dc2", "dc3", "xxx", "dc4"}, - QueryFn: func(dc string, _ interface{}, reply interface{}) error { - ret := reply.(*structs.PreparedQueryExecuteResponse) - if dc == "dc1" { - ret.Nodes = nodes() + QueryFn: func(req *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + if req.Datacenter == "dc1" { + reply.Nodes = nodes() } return nil }, @@ -2890,10 +3064,9 @@ func TestPreparedQuery_queryFailover(t *testing.T) { { mock := &mockQueryServer{ Datacenters: []string{"dc1", "dc2", "dc3", "xxx", "dc4"}, - QueryFn: func(dc string, _ interface{}, reply interface{}) error { - ret := reply.(*structs.PreparedQueryExecuteResponse) - if dc == "dc3" { - ret.Nodes = nodes() + QueryFn: func(req *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + if req.Datacenter == "dc3" { + reply.Nodes = nodes() } return nil }, @@ -2926,7 +3099,7 @@ func TestPreparedQuery_queryFailover(t *testing.T) { } if len(reply.Nodes) != 0 || reply.Datacenter != "xxx" || reply.Failovers != 4 { - t.Fatalf("bad: %v", reply) + t.Fatalf("bad: %+v", reply) } if queries := mock.JoinQueryLog(); queries != "dc1:PreparedQuery.ExecuteRemote|dc2:PreparedQuery.ExecuteRemote|dc3:PreparedQuery.ExecuteRemote|xxx:PreparedQuery.ExecuteRemote" { t.Fatalf("bad: %s", queries) @@ -2940,10 +3113,9 @@ func TestPreparedQuery_queryFailover(t *testing.T) { { mock := &mockQueryServer{ Datacenters: []string{"dc1", "dc2", "dc3", "xxx", "dc4"}, - QueryFn: func(dc string, _ interface{}, reply interface{}) error { - ret := reply.(*structs.PreparedQueryExecuteResponse) - if dc == "dc4" { - ret.Nodes = nodes() + QueryFn: func(req *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + if req.Datacenter == "dc4" { + reply.Nodes = nodes() } return nil }, @@ -2969,10 +3141,9 @@ func TestPreparedQuery_queryFailover(t *testing.T) { { mock := &mockQueryServer{ Datacenters: []string{"dc1", "dc2", "dc3", "xxx", "dc4"}, - QueryFn: func(dc string, _ interface{}, reply interface{}) error { - ret := reply.(*structs.PreparedQueryExecuteResponse) - if dc == "dc4" { - ret.Nodes = nodes() + QueryFn: func(req *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + if req.Datacenter == "dc4" { + reply.Nodes = nodes() } return nil }, @@ -2998,10 +3169,9 @@ func TestPreparedQuery_queryFailover(t *testing.T) { { mock := &mockQueryServer{ Datacenters: []string{"dc1", "dc2", "dc3", "xxx", "dc4"}, - QueryFn: func(dc string, _ interface{}, reply interface{}) error { - ret := reply.(*structs.PreparedQueryExecuteResponse) - if dc == "dc4" { - ret.Nodes = nodes() + QueryFn: func(req *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + if req.Datacenter == "dc4" { + reply.Nodes = nodes() } return nil }, @@ -3029,12 +3199,11 @@ func TestPreparedQuery_queryFailover(t *testing.T) { { mock := &mockQueryServer{ Datacenters: []string{"dc1", "dc2", "dc3", "xxx", "dc4"}, - QueryFn: func(dc string, _ interface{}, reply interface{}) error { - ret := reply.(*structs.PreparedQueryExecuteResponse) - if dc == "dc1" { + QueryFn: func(req *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + if req.Datacenter == "dc1" { return fmt.Errorf("XXX") - } else if dc == "dc4" { - ret.Nodes = nodes() + } else if req.Datacenter == "dc4" { + reply.Nodes = nodes() } return nil }, @@ -3063,10 +3232,9 @@ func TestPreparedQuery_queryFailover(t *testing.T) { { mock := &mockQueryServer{ Datacenters: []string{"dc1", "dc2", "dc3", "xxx", "dc4"}, - QueryFn: func(dc string, _ interface{}, reply interface{}) error { - ret := reply.(*structs.PreparedQueryExecuteResponse) - if dc == "xxx" { - ret.Nodes = nodes() + QueryFn: func(req *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + if req.Datacenter == "xxx" { + reply.Nodes = nodes() } return nil }, @@ -3092,17 +3260,15 @@ func TestPreparedQuery_queryFailover(t *testing.T) { { mock := &mockQueryServer{ Datacenters: []string{"dc1", "dc2", "dc3", "xxx", "dc4"}, - QueryFn: func(dc string, args interface{}, reply interface{}) error { - inp := args.(*structs.PreparedQueryExecuteRemoteRequest) - ret := reply.(*structs.PreparedQueryExecuteResponse) - if dc == "xxx" { - if inp.Limit != 5 { - t.Fatalf("bad: %d", inp.Limit) + QueryFn: func(req *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + if req.Datacenter == "xxx" { + if req.Limit != 5 { + t.Fatalf("bad: %d", req.Limit) } - if inp.RequireConsistent != true { - t.Fatalf("bad: %v", inp.RequireConsistent) + if req.RequireConsistent != true { + t.Fatalf("bad: %v", req.RequireConsistent) } - ret.Nodes = nodes() + reply.Nodes = nodes() } return nil }, @@ -3124,4 +3290,32 @@ func TestPreparedQuery_queryFailover(t *testing.T) { t.Fatalf("bad: %s", queries) } } + + // Failover returns data from the first cluster peer with data. + query.Service.Failover.Datacenters = nil + query.Service.Failover.Targets = []structs.QueryFailoverTarget{ + {PeerName: "cluster-01"}, + {Datacenter: "dc44"}, + {PeerName: "cluster-02"}, + } + { + mock := &mockQueryServer{ + Datacenters: []string{"dc44"}, + QueryFn: func(args *structs.PreparedQueryExecuteRemoteRequest, reply *structs.PreparedQueryExecuteResponse) error { + if args.Query.Service.PeerName == "cluster-02" { + reply.Nodes = nodes() + } + return nil + }, + } + + var reply structs.PreparedQueryExecuteResponse + if err := queryFailover(mock, query, &structs.PreparedQueryExecuteRequest{}, &reply); err != nil { + t.Fatalf("err: %v", err) + } + require.Equal(t, "cluster-02", reply.PeerName) + require.Equal(t, 3, reply.Failovers) + require.Equal(t, nodes(), reply.Nodes) + require.Equal(t, "peer:cluster-01|dc44:PreparedQuery.ExecuteRemote|peer:cluster-02", mock.JoinQueryLog()) + } } diff --git a/agent/consul/server.go b/agent/consul/server.go index a5708e030..a14253d80 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -127,6 +127,7 @@ const ( virtualIPCheckRoutineName = "virtual IP version check" peeringStreamsRoutineName = "streaming peering resources" peeringDeletionRoutineName = "peering deferred deletion" + peeringStreamsMetricsRoutineName = "metrics for streaming peering resources" ) var ( @@ -367,8 +368,9 @@ type Server struct { // peeringBackend is shared between the external and internal gRPC services for peering peeringBackend *PeeringBackend - // peerStreamServer is a server used to handle peering streams - peerStreamServer *peerstream.Server + // peerStreamServer is a server used to handle peering streams from external clusters. + peerStreamServer *peerstream.Server + // peeringServer handles peering RPC requests internal to this cluster, like generating peering tokens. peeringServer *peering.Server peerStreamTracker *peerstream.Tracker @@ -792,6 +794,7 @@ func newGRPCHandlerFromConfig(deps Deps, config *Config, s *Server) connHandler }, Datacenter: config.Datacenter, ConnectEnabled: config.ConnectEnabled, + PeeringEnabled: config.PeeringEnabled, }) s.peeringServer = p diff --git a/agent/consul/server_test.go b/agent/consul/server_test.go index 77f761f68..b9f9cc4f1 100644 --- a/agent/consul/server_test.go +++ b/agent/consul/server_test.go @@ -25,6 +25,7 @@ import ( "github.com/hashicorp/consul-net-rpc/net/rpc" "github.com/hashicorp/consul/agent/connect" + external "github.com/hashicorp/consul/agent/grpc-external" "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/rpc/middleware" "github.com/hashicorp/consul/agent/structs" @@ -299,8 +300,7 @@ func newServerWithDeps(t *testing.T, c *Config, deps Deps) (*Server, error) { } } - srv, err := NewServer(c, deps, grpc.NewServer()) - + srv, err := NewServer(c, deps, external.NewServer(deps.Logger.Named("grpc.external"), deps.TLSConfigurator)) if err != nil { return nil, err } diff --git a/agent/consul/state/catalog.go b/agent/consul/state/catalog.go index 2f116ab0f..849d0820c 100644 --- a/agent/consul/state/catalog.go +++ b/agent/consul/state/catalog.go @@ -1990,7 +1990,7 @@ func (s *Store) deleteServiceTxn(tx WriteTxn, idx uint64, nodeName, serviceID st } } psn := structs.PeeredServiceName{Peer: svc.PeerName, ServiceName: name} - if err := freeServiceVirtualIP(tx, psn, nil); err != nil { + if err := freeServiceVirtualIP(tx, idx, psn, nil); err != nil { return fmt.Errorf("failed to clean up virtual IP for %q: %v", name.String(), err) } if err := cleanupKindServiceName(tx, idx, svc.CompoundServiceName(), svc.ServiceKind); err != nil { @@ -2008,6 +2008,7 @@ func (s *Store) deleteServiceTxn(tx WriteTxn, idx uint64, nodeName, serviceID st // is removed. func freeServiceVirtualIP( tx WriteTxn, + idx uint64, psn structs.PeeredServiceName, excludeGateway *structs.ServiceName, ) error { @@ -2059,6 +2060,10 @@ func freeServiceVirtualIP( return fmt.Errorf("failed updating freed virtual IP table: %v", err) } + if err := updateVirtualIPMaxIndexes(tx, idx, psn.ServiceName.PartitionOrDefault(), psn.Peer); err != nil { + return err + } + return nil } @@ -2907,6 +2912,25 @@ func (s *Store) GatewayServices(ws memdb.WatchSet, gateway string, entMeta *acl. return lib.MaxUint64(maxIdx, idx), results, nil } +// TODO: Find a way to consolidate this with CheckIngressServiceNodes +// ServiceGateways is used to query all gateways associated with a service +func (s *Store) ServiceGateways(ws memdb.WatchSet, service string, kind structs.ServiceKind, entMeta acl.EnterpriseMeta) (uint64, structs.CheckServiceNodes, error) { + tx := s.db.Txn(false) + defer tx.Abort() + + // tableGatewayServices is not peer-aware, and the existence of TG/IG gateways is scrubbed during peer replication. + maxIdx, nodes, err := serviceGatewayNodes(tx, ws, service, kind, &entMeta, structs.DefaultPeerKeyword) + + // Watch for index changes to the gateway nodes + idx, chans := maxIndexAndWatchChsForServiceNodes(tx, nodes, false) + for _, ch := range chans { + ws.Add(ch) + } + maxIdx = lib.MaxUint64(maxIdx, idx) + + return parseCheckServiceNodes(tx, ws, maxIdx, nodes, &entMeta, structs.DefaultPeerKeyword, err) +} + func (s *Store) VirtualIPForService(psn structs.PeeredServiceName) (string, error) { tx := s.db.Txn(false) defer tx.Abort() @@ -3478,7 +3502,7 @@ func updateTerminatingGatewayVirtualIPs(tx WriteTxn, idx uint64, conf *structs.T } if len(nodes) == 0 { psn := structs.PeeredServiceName{Peer: structs.DefaultPeerKeyword, ServiceName: sn} - if err := freeServiceVirtualIP(tx, psn, &gatewayName); err != nil { + if err := freeServiceVirtualIP(tx, idx, psn, &gatewayName); err != nil { return err } } @@ -3862,7 +3886,7 @@ func (s *Store) collectGatewayServices(tx ReadTxn, ws memdb.WatchSet, iter memdb return maxIdx, results, nil } -// TODO(ingress): How to handle index rolling back when a config entry is +// TODO: How to handle index rolling back when a config entry is // deleted that references a service? // We might need something like the service_last_extinction index? func serviceGatewayNodes(tx ReadTxn, ws memdb.WatchSet, service string, kind structs.ServiceKind, entMeta *acl.EnterpriseMeta, peerName string) (uint64, structs.ServiceNodes, error) { diff --git a/agent/consul/state/catalog_events.go b/agent/consul/state/catalog_events.go index b4b498b98..06d6414af 100644 --- a/agent/consul/state/catalog_events.go +++ b/agent/consul/state/catalog_events.go @@ -9,6 +9,7 @@ import ( "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/stream" "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/proto/pbcommon" "github.com/hashicorp/consul/proto/pbservice" "github.com/hashicorp/consul/proto/pbsubscribe" ) @@ -71,6 +72,39 @@ func (e EventPayloadCheckServiceNode) ToSubscriptionEvent(idx uint64) *pbsubscri } } +// EventPayloadServiceListUpdate is used as the Payload for a stream.Event when +// services (not service instances) are registered/deregistered. These events +// are used to materialize the list of services in a datacenter. +type EventPayloadServiceListUpdate struct { + Op pbsubscribe.CatalogOp + + Name string + EnterpriseMeta acl.EnterpriseMeta + PeerName string +} + +func (e *EventPayloadServiceListUpdate) ToSubscriptionEvent(idx uint64) *pbsubscribe.Event { + return &pbsubscribe.Event{ + Index: idx, + Payload: &pbsubscribe.Event_Service{ + Service: &pbsubscribe.ServiceListUpdate{ + Op: e.Op, + Name: e.Name, + EnterpriseMeta: pbcommon.NewEnterpriseMetaFromStructs(e.EnterpriseMeta), + PeerName: e.PeerName, + }, + }, + } +} + +func (e *EventPayloadServiceListUpdate) Subject() stream.Subject { return stream.SubjectNone } + +func (e *EventPayloadServiceListUpdate) HasReadPermission(authz acl.Authorizer) bool { + var authzContext acl.AuthorizerContext + e.EnterpriseMeta.FillAuthzContext(&authzContext) + return authz.ServiceRead(e.Name, &authzContext) == acl.Allow +} + // serviceHealthSnapshot returns a stream.SnapshotFunc that provides a snapshot // of stream.Events that describe the current state of a service health query. func (s *Store) ServiceHealthSnapshot(req stream.SubscribeRequest, buf stream.SnapshotAppender) (index uint64, err error) { @@ -156,6 +190,65 @@ type nodeTuple struct { var serviceChangeIndirect = serviceChange{changeType: changeIndirect} +// ServiceListUpdateEventsFromChanges returns events representing changes to +// the list of services from the given set of state store changes. +func ServiceListUpdateEventsFromChanges(tx ReadTxn, changes Changes) ([]stream.Event, error) { + var events []stream.Event + for _, change := range changes.Changes { + if change.Table != tableKindServiceNames { + continue + } + + kindName := changeObject(change).(*KindServiceName) + + // TODO(peering): make this peer-aware. + payload := &EventPayloadServiceListUpdate{ + Name: kindName.Service.Name, + EnterpriseMeta: kindName.Service.EnterpriseMeta, + } + + if change.Deleted() { + payload.Op = pbsubscribe.CatalogOp_Deregister + } else { + payload.Op = pbsubscribe.CatalogOp_Register + } + + events = append(events, stream.Event{ + Topic: EventTopicServiceList, + Index: changes.Index, + Payload: payload, + }) + } + return events, nil +} + +// ServiceListSnapshot is a stream.SnapshotFunc that returns a snapshot of +// all service names. +func (s *Store) ServiceListSnapshot(_ stream.SubscribeRequest, buf stream.SnapshotAppender) (uint64, error) { + index, names, err := s.ServiceNamesOfKind(nil, "") + if err != nil { + return 0, err + } + + if l := len(names); l > 0 { + events := make([]stream.Event, l) + for idx, name := range names { + events[idx] = stream.Event{ + Topic: EventTopicServiceList, + Index: index, + Payload: &EventPayloadServiceListUpdate{ + Op: pbsubscribe.CatalogOp_Register, + Name: name.Service.Name, + EnterpriseMeta: name.Service.EnterpriseMeta, + }, + } + } + buf.Append(events) + } + + return index, nil +} + // ServiceHealthEventsFromChanges returns all the service and Connect health // events that should be emitted given a set of changes to the state store. func ServiceHealthEventsFromChanges(tx ReadTxn, changes Changes) ([]stream.Event, error) { diff --git a/agent/consul/state/catalog_events_test.go b/agent/consul/state/catalog_events_test.go index 129b834b8..d4e1175cd 100644 --- a/agent/consul/state/catalog_events_test.go +++ b/agent/consul/state/catalog_events_test.go @@ -8,6 +8,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/require" + "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/stream" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/api" @@ -1674,7 +1675,7 @@ func TestServiceHealthEventsFromChanges(t *testing.T) { configEntryDest := &structs.ServiceConfigEntry{ Kind: structs.ServiceDefaults, Name: "destination1", - Destination: &structs.DestinationConfig{Port: 9000, Address: "kafka.test.com"}, + Destination: &structs.DestinationConfig{Port: 9000, Addresses: []string{"kafka.test.com"}}, } return ensureConfigEntryTxn(tx, tx.Index, configEntryDest) }, @@ -1720,7 +1721,7 @@ func TestServiceHealthEventsFromChanges(t *testing.T) { configEntryDest := &structs.ServiceConfigEntry{ Kind: structs.ServiceDefaults, Name: "destination1", - Destination: &structs.DestinationConfig{Port: 9000, Address: "kafka.test.com"}, + Destination: &structs.DestinationConfig{Port: 9000, Addresses: []string{"kafka.test.com"}}, } return ensureConfigEntryTxn(tx, tx.Index, configEntryDest) }, @@ -2543,3 +2544,114 @@ func newPayloadCheckServiceNodeWithOverride( overrideNamespace: overrideNamespace, } } + +func TestServiceListUpdateSnapshot(t *testing.T) { + const index uint64 = 123 + + store := testStateStore(t) + require.NoError(t, store.EnsureRegistration(index, testServiceRegistration(t, "db"))) + + buf := &snapshotAppender{} + idx, err := store.ServiceListSnapshot(stream.SubscribeRequest{Subject: stream.SubjectNone}, buf) + require.NoError(t, err) + require.NotZero(t, idx) + + require.Len(t, buf.events, 1) + require.Len(t, buf.events[0], 1) + + payload := buf.events[0][0].Payload.(*EventPayloadServiceListUpdate) + require.Equal(t, pbsubscribe.CatalogOp_Register, payload.Op) + require.Equal(t, "db", payload.Name) +} + +func TestServiceListUpdateEventsFromChanges(t *testing.T) { + const changeIndex = 123 + + testCases := map[string]struct { + setup func(*Store, *txn) error + mutate func(*Store, *txn) error + events []stream.Event + }{ + "register new service": { + mutate: func(store *Store, tx *txn) error { + return store.ensureRegistrationTxn(tx, changeIndex, false, testServiceRegistration(t, "db"), false) + }, + events: []stream.Event{ + { + Topic: EventTopicServiceList, + Index: changeIndex, + Payload: &EventPayloadServiceListUpdate{ + Op: pbsubscribe.CatalogOp_Register, + Name: "db", + EnterpriseMeta: *acl.DefaultEnterpriseMeta(), + }, + }, + }, + }, + "service already registered": { + setup: func(store *Store, tx *txn) error { + return store.ensureRegistrationTxn(tx, changeIndex, false, testServiceRegistration(t, "db"), false) + }, + mutate: func(store *Store, tx *txn) error { + return store.ensureRegistrationTxn(tx, changeIndex, false, testServiceRegistration(t, "db"), false) + }, + events: nil, + }, + "deregister last instance of service": { + setup: func(store *Store, tx *txn) error { + return store.ensureRegistrationTxn(tx, changeIndex, false, testServiceRegistration(t, "db"), false) + }, + mutate: func(store *Store, tx *txn) error { + return store.deleteServiceTxn(tx, tx.Index, "node1", "db", nil, "") + }, + events: []stream.Event{ + { + Topic: EventTopicServiceList, + Index: changeIndex, + Payload: &EventPayloadServiceListUpdate{ + Op: pbsubscribe.CatalogOp_Deregister, + Name: "db", + EnterpriseMeta: *acl.DefaultEnterpriseMeta(), + }, + }, + }, + }, + "deregister (not the last) instance of service": { + setup: func(store *Store, tx *txn) error { + if err := store.ensureRegistrationTxn(tx, changeIndex, false, testServiceRegistration(t, "db"), false); err != nil { + return err + } + if err := store.ensureRegistrationTxn(tx, changeIndex, false, testServiceRegistration(t, "db", regNode2), false); err != nil { + return err + } + return nil + }, + mutate: func(store *Store, tx *txn) error { + return store.deleteServiceTxn(tx, tx.Index, "node1", "db", nil, "") + }, + events: nil, + }, + } + for desc, tc := range testCases { + t.Run(desc, func(t *testing.T) { + store := testStateStore(t) + + if tc.setup != nil { + tx := store.db.WriteTxn(0) + require.NoError(t, tc.setup(store, tx)) + require.NoError(t, tx.Commit()) + } + + tx := store.db.WriteTxn(0) + t.Cleanup(tx.Abort) + + if tc.mutate != nil { + require.NoError(t, tc.mutate(store, tx)) + } + + events, err := ServiceListUpdateEventsFromChanges(tx, Changes{Index: changeIndex, Changes: tx.Changes()}) + require.NoError(t, err) + require.Equal(t, tc.events, events) + }) + } +} diff --git a/agent/consul/state/catalog_oss_test.go b/agent/consul/state/catalog_oss_test.go index 2603e85b7..0fa912973 100644 --- a/agent/consul/state/catalog_oss_test.go +++ b/agent/consul/state/catalog_oss_test.go @@ -34,11 +34,11 @@ func testIndexerTableChecks() map[string]indexerTestCase { Node: "NoDe", CheckID: "CheckId", }, - expected: []byte("internal\x00node\x00checkid\x00"), + expected: []byte("~\x00node\x00checkid\x00"), }, write: indexValue{ source: obj, - expected: []byte("internal\x00node\x00checkid\x00"), + expected: []byte("~\x00node\x00checkid\x00"), }, prefix: []indexValue{ { @@ -47,7 +47,7 @@ func testIndexerTableChecks() map[string]indexerTestCase { }, { source: Query{Value: "nOdE"}, - expected: []byte("internal\x00node\x00"), + expected: []byte("~\x00node\x00"), }, }, extra: []indexerTestCase{ @@ -77,11 +77,11 @@ func testIndexerTableChecks() map[string]indexerTestCase { indexStatus: { read: indexValue{ source: Query{Value: "PASSING"}, - expected: []byte("internal\x00passing\x00"), + expected: []byte("~\x00passing\x00"), }, write: indexValue{ source: obj, - expected: []byte("internal\x00passing\x00"), + expected: []byte("~\x00passing\x00"), }, extra: []indexerTestCase{ { @@ -99,11 +99,11 @@ func testIndexerTableChecks() map[string]indexerTestCase { indexService: { read: indexValue{ source: Query{Value: "ServiceName"}, - expected: []byte("internal\x00servicename\x00"), + expected: []byte("~\x00servicename\x00"), }, write: indexValue{ source: obj, - expected: []byte("internal\x00servicename\x00"), + expected: []byte("~\x00servicename\x00"), }, extra: []indexerTestCase{ { @@ -124,11 +124,11 @@ func testIndexerTableChecks() map[string]indexerTestCase { Node: "NoDe", Service: "SeRvIcE", }, - expected: []byte("internal\x00node\x00service\x00"), + expected: []byte("~\x00node\x00service\x00"), }, write: indexValue{ source: obj, - expected: []byte("internal\x00node\x00service\x00"), + expected: []byte("~\x00node\x00service\x00"), }, extra: []indexerTestCase{ { @@ -152,11 +152,11 @@ func testIndexerTableChecks() map[string]indexerTestCase { source: Query{ Value: "NoDe", }, - expected: []byte("internal\x00node\x00"), + expected: []byte("~\x00node\x00"), }, write: indexValue{ source: obj, - expected: []byte("internal\x00node\x00"), + expected: []byte("~\x00node\x00"), }, extra: []indexerTestCase{ { @@ -272,11 +272,11 @@ func testIndexerTableNodes() map[string]indexerTestCase { indexID: { read: indexValue{ source: Query{Value: "NoDeId"}, - expected: []byte("internal\x00nodeid\x00"), + expected: []byte("~\x00nodeid\x00"), }, write: indexValue{ source: &structs.Node{Node: "NoDeId"}, - expected: []byte("internal\x00nodeid\x00"), + expected: []byte("~\x00nodeid\x00"), }, prefix: []indexValue{ { @@ -289,11 +289,11 @@ func testIndexerTableNodes() map[string]indexerTestCase { }, { source: Query{Value: "NoDeId"}, - expected: []byte("internal\x00nodeid\x00"), + expected: []byte("~\x00nodeid\x00"), }, { source: Query{}, - expected: []byte("internal\x00"), + expected: []byte("~\x00"), }, }, extra: []indexerTestCase{ @@ -322,27 +322,27 @@ func testIndexerTableNodes() map[string]indexerTestCase { indexUUID: { read: indexValue{ source: Query{Value: uuid}, - expected: append([]byte("internal\x00"), uuidBuf...), + expected: append([]byte("~\x00"), uuidBuf...), }, write: indexValue{ source: &structs.Node{ ID: types.NodeID(uuid), Node: "NoDeId", }, - expected: append([]byte("internal\x00"), uuidBuf...), + expected: append([]byte("~\x00"), uuidBuf...), }, prefix: []indexValue{ { // partial length source: Query{Value: uuid[:6]}, - expected: append([]byte("internal\x00"), uuidBuf[:3]...), + expected: append([]byte("~\x00"), uuidBuf[:3]...), }, { // full length source: Query{Value: uuid}, - expected: append([]byte("internal\x00"), uuidBuf...), + expected: append([]byte("~\x00"), uuidBuf...), }, { source: Query{}, - expected: []byte("internal\x00"), + expected: []byte("~\x00"), }, }, extra: []indexerTestCase{ @@ -382,7 +382,7 @@ func testIndexerTableNodes() map[string]indexerTestCase { Key: "KeY", Value: "VaLuE", }, - expected: []byte("internal\x00KeY\x00VaLuE\x00"), + expected: []byte("~\x00KeY\x00VaLuE\x00"), }, writeMulti: indexValueMulti{ source: &structs.Node{ @@ -393,8 +393,8 @@ func testIndexerTableNodes() map[string]indexerTestCase { }, }, expected: [][]byte{ - []byte("internal\x00MaP-kEy-1\x00mAp-VaL-1\x00"), - []byte("internal\x00mAp-KeY-2\x00MaP-vAl-2\x00"), + []byte("~\x00MaP-kEy-1\x00mAp-VaL-1\x00"), + []byte("~\x00mAp-KeY-2\x00MaP-vAl-2\x00"), }, }, extra: []indexerTestCase{ @@ -449,11 +449,11 @@ func testIndexerTableServices() map[string]indexerTestCase { Node: "NoDeId", Service: "SeRvIcE", }, - expected: []byte("internal\x00nodeid\x00service\x00"), + expected: []byte("~\x00nodeid\x00service\x00"), }, write: indexValue{ source: obj, - expected: []byte("internal\x00nodeid\x00service\x00"), + expected: []byte("~\x00nodeid\x00service\x00"), }, prefix: []indexValue{ { @@ -466,11 +466,11 @@ func testIndexerTableServices() map[string]indexerTestCase { }, { source: Query{}, - expected: []byte("internal\x00"), + expected: []byte("~\x00"), }, { source: Query{Value: "NoDeId"}, - expected: []byte("internal\x00nodeid\x00"), + expected: []byte("~\x00nodeid\x00"), }, }, extra: []indexerTestCase{ @@ -505,11 +505,11 @@ func testIndexerTableServices() map[string]indexerTestCase { source: Query{ Value: "NoDeId", }, - expected: []byte("internal\x00nodeid\x00"), + expected: []byte("~\x00nodeid\x00"), }, write: indexValue{ source: obj, - expected: []byte("internal\x00nodeid\x00"), + expected: []byte("~\x00nodeid\x00"), }, extra: []indexerTestCase{ { @@ -530,11 +530,11 @@ func testIndexerTableServices() map[string]indexerTestCase { indexService: { read: indexValue{ source: Query{Value: "ServiceName"}, - expected: []byte("internal\x00servicename\x00"), + expected: []byte("~\x00servicename\x00"), }, write: indexValue{ source: obj, - expected: []byte("internal\x00servicename\x00"), + expected: []byte("~\x00servicename\x00"), }, extra: []indexerTestCase{ { @@ -552,14 +552,14 @@ func testIndexerTableServices() map[string]indexerTestCase { indexConnect: { read: indexValue{ source: Query{Value: "ConnectName"}, - expected: []byte("internal\x00connectname\x00"), + expected: []byte("~\x00connectname\x00"), }, write: indexValue{ source: &structs.ServiceNode{ ServiceName: "ConnectName", ServiceConnect: structs.ServiceConnect{Native: true}, }, - expected: []byte("internal\x00connectname\x00"), + expected: []byte("~\x00connectname\x00"), }, extra: []indexerTestCase{ { @@ -571,7 +571,7 @@ func testIndexerTableServices() map[string]indexerTestCase { DestinationServiceName: "ConnectName", }, }, - expected: []byte("internal\x00connectname\x00"), + expected: []byte("~\x00connectname\x00"), }, }, { @@ -621,13 +621,13 @@ func testIndexerTableServices() map[string]indexerTestCase { indexKind: { read: indexValue{ source: Query{Value: "connect-proxy"}, - expected: []byte("internal\x00connect-proxy\x00"), + expected: []byte("~\x00connect-proxy\x00"), }, write: indexValue{ source: &structs.ServiceNode{ ServiceKind: structs.ServiceKindConnectProxy, }, - expected: []byte("internal\x00connect-proxy\x00"), + expected: []byte("~\x00connect-proxy\x00"), }, extra: []indexerTestCase{ { @@ -636,7 +636,7 @@ func testIndexerTableServices() map[string]indexerTestCase { ServiceName: "ServiceName", ServiceKind: structs.ServiceKindTypical, }, - expected: []byte("internal\x00\x00"), + expected: []byte("~\x00\x00"), }, }, { @@ -694,18 +694,18 @@ func testIndexerTableServiceVirtualIPs() map[string]indexerTestCase { Name: "foo", }, }, - expected: []byte("internal\x00foo\x00"), + expected: []byte("~\x00foo\x00"), }, write: indexValue{ source: obj, - expected: []byte("internal\x00foo\x00"), + expected: []byte("~\x00foo\x00"), }, prefix: []indexValue{ { source: Query{ Value: "foo", }, - expected: []byte("internal\x00foo\x00"), + expected: []byte("~\x00foo\x00"), }, { source: Query{ diff --git a/agent/consul/state/catalog_test.go b/agent/consul/state/catalog_test.go index d2a970b07..fed3bd0ee 100644 --- a/agent/consul/state/catalog_test.go +++ b/agent/consul/state/catalog_test.go @@ -4,6 +4,7 @@ import ( "context" crand "crypto/rand" "fmt" + "github.com/hashicorp/consul/acl" "reflect" "sort" "strings" @@ -5346,6 +5347,400 @@ func TestStateStore_GatewayServices_Terminating(t *testing.T) { assert.Len(t, out, 0) } +func TestStateStore_ServiceGateways_Terminating(t *testing.T) { + s := testStateStore(t) + + // Listing with no results returns an empty list. + ws := memdb.NewWatchSet() + idx, nodes, err := s.GatewayServices(ws, "db", nil) + assert.Nil(t, err) + assert.Equal(t, uint64(0), idx) + assert.Len(t, nodes, 0) + + // Create some nodes + assert.Nil(t, s.EnsureNode(10, &structs.Node{Node: "foo", Address: "127.0.0.1"})) + assert.Nil(t, s.EnsureNode(11, &structs.Node{Node: "bar", Address: "127.0.0.2"})) + assert.Nil(t, s.EnsureNode(12, &structs.Node{Node: "baz", Address: "127.0.0.2"})) + + // Typical services and some consul services spread across two nodes + assert.Nil(t, s.EnsureService(13, "foo", &structs.NodeService{ID: "db", Service: "db", Tags: nil, Address: "", Port: 5000})) + assert.Nil(t, s.EnsureService(15, "bar", &structs.NodeService{ID: "api", Service: "api", Tags: nil, Address: "", Port: 5000})) + assert.Nil(t, s.EnsureService(16, "bar", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil})) + assert.Nil(t, s.EnsureService(17, "bar", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil})) + + // Add ingress gateway and a connect proxy, neither should get picked up by terminating gateway + ingressNS := &structs.NodeService{ + Kind: structs.ServiceKindIngressGateway, + ID: "ingress", + Service: "ingress", + Port: 8443, + } + assert.Nil(t, s.EnsureService(18, "baz", ingressNS)) + + proxyNS := &structs.NodeService{ + Kind: structs.ServiceKindConnectProxy, + ID: "db proxy", + Service: "db proxy", + Proxy: structs.ConnectProxyConfig{ + DestinationServiceName: "db", + }, + Port: 8000, + } + assert.Nil(t, s.EnsureService(19, "foo", proxyNS)) + + // Register a gateway + assert.Nil(t, s.EnsureService(20, "baz", &structs.NodeService{Kind: structs.ServiceKindTerminatingGateway, ID: "gateway", Service: "gateway", Port: 443})) + + // Associate gateway with db and api + assert.Nil(t, s.EnsureConfigEntry(21, &structs.TerminatingGatewayConfigEntry{ + Kind: "terminating-gateway", + Name: "gateway", + Services: []structs.LinkedService{ + { + Name: "db", + }, + { + Name: "api", + }, + }, + })) + assert.True(t, watchFired(ws)) + + // Read everything back. + ws = memdb.NewWatchSet() + idx, out, err := s.ServiceGateways(ws, "db", structs.ServiceKindTerminatingGateway, *structs.DefaultEnterpriseMetaInDefaultPartition()) + assert.Nil(t, err) + assert.Equal(t, uint64(21), idx) + assert.Len(t, out, 1) + + expect := structs.CheckServiceNodes{ + { + Node: &structs.Node{ + ID: "", + Address: "127.0.0.2", + Node: "baz", + Partition: acl.DefaultPartitionName, + RaftIndex: structs.RaftIndex{ + CreateIndex: 12, + ModifyIndex: 12, + }, + }, + Service: &structs.NodeService{ + Service: "gateway", + Kind: structs.ServiceKindTerminatingGateway, + ID: "gateway", + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + Weights: &structs.Weights{Passing: 1, Warning: 1}, + Port: 443, + RaftIndex: structs.RaftIndex{ + CreateIndex: 20, + ModifyIndex: 20, + }, + }, + }, + } + assert.Equal(t, expect, out) + + // Check that we don't update on same exact config + assert.Nil(t, s.EnsureConfigEntry(21, &structs.TerminatingGatewayConfigEntry{ + Kind: "terminating-gateway", + Name: "gateway", + Services: []structs.LinkedService{ + { + Name: "db", + }, + { + Name: "api", + }, + }, + })) + assert.False(t, watchFired(ws)) + + idx, out, err = s.ServiceGateways(ws, "api", structs.ServiceKindTerminatingGateway, *structs.DefaultEnterpriseMetaInDefaultPartition()) + assert.Nil(t, err) + assert.Equal(t, uint64(21), idx) + assert.Len(t, out, 1) + + expect = structs.CheckServiceNodes{ + { + Node: &structs.Node{ + ID: "", + Address: "127.0.0.2", + Node: "baz", + Partition: acl.DefaultPartitionName, + RaftIndex: structs.RaftIndex{ + CreateIndex: 12, + ModifyIndex: 12, + }, + }, + Service: &structs.NodeService{ + Service: "gateway", + Kind: structs.ServiceKindTerminatingGateway, + ID: "gateway", + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + Weights: &structs.Weights{Passing: 1, Warning: 1}, + Port: 443, + RaftIndex: structs.RaftIndex{ + CreateIndex: 20, + ModifyIndex: 20, + }, + }, + }, + } + assert.Equal(t, expect, out) + + // Associate gateway with a wildcard and add TLS config + assert.Nil(t, s.EnsureConfigEntry(22, &structs.TerminatingGatewayConfigEntry{ + Kind: "terminating-gateway", + Name: "gateway", + Services: []structs.LinkedService{ + { + Name: "api", + CAFile: "api/ca.crt", + CertFile: "api/client.crt", + KeyFile: "api/client.key", + SNI: "my-domain", + }, + { + Name: "db", + }, + { + Name: "*", + CAFile: "ca.crt", + CertFile: "client.crt", + KeyFile: "client.key", + SNI: "my-alt-domain", + }, + }, + })) + assert.True(t, watchFired(ws)) + + // Read everything back. + ws = memdb.NewWatchSet() + idx, out, err = s.ServiceGateways(ws, "db", structs.ServiceKindTerminatingGateway, *structs.DefaultEnterpriseMetaInDefaultPartition()) + assert.Nil(t, err) + assert.Equal(t, uint64(22), idx) + assert.Len(t, out, 1) + + expect = structs.CheckServiceNodes{ + { + Node: &structs.Node{ + ID: "", + Address: "127.0.0.2", + Node: "baz", + Partition: acl.DefaultPartitionName, + RaftIndex: structs.RaftIndex{ + CreateIndex: 12, + ModifyIndex: 12, + }, + }, + Service: &structs.NodeService{ + Service: "gateway", + Kind: structs.ServiceKindTerminatingGateway, + ID: "gateway", + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + Weights: &structs.Weights{Passing: 1, Warning: 1}, + Port: 443, + RaftIndex: structs.RaftIndex{ + CreateIndex: 20, + ModifyIndex: 20, + }, + }, + }, + } + assert.Equal(t, expect, out) + + // Add a service covered by wildcard + assert.Nil(t, s.EnsureService(23, "bar", &structs.NodeService{ID: "redis", Service: "redis", Tags: nil, Address: "", Port: 6379})) + + ws = memdb.NewWatchSet() + idx, out, err = s.ServiceGateways(ws, "redis", structs.ServiceKindTerminatingGateway, *structs.DefaultEnterpriseMetaInDefaultPartition()) + assert.Nil(t, err) + assert.Equal(t, uint64(23), idx) + assert.Len(t, out, 1) + + expect = structs.CheckServiceNodes{ + { + Node: &structs.Node{ + ID: "", + Address: "127.0.0.2", + Node: "baz", + Partition: acl.DefaultPartitionName, + RaftIndex: structs.RaftIndex{ + CreateIndex: 12, + ModifyIndex: 12, + }, + }, + Service: &structs.NodeService{ + Service: "gateway", + Kind: structs.ServiceKindTerminatingGateway, + ID: "gateway", + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + Weights: &structs.Weights{Passing: 1, Warning: 1}, + Port: 443, + RaftIndex: structs.RaftIndex{ + CreateIndex: 20, + ModifyIndex: 20, + }, + }, + }, + } + assert.Equal(t, expect, out) + + // Delete a service covered by wildcard + assert.Nil(t, s.DeleteService(24, "bar", "redis", structs.DefaultEnterpriseMetaInDefaultPartition(), "")) + assert.True(t, watchFired(ws)) + + ws = memdb.NewWatchSet() + idx, out, err = s.ServiceGateways(ws, "redis", structs.ServiceKindTerminatingGateway, *structs.DefaultEnterpriseMetaInDefaultPartition()) + assert.Nil(t, err) + // TODO: wildcards don't keep the same extinction index + assert.Equal(t, uint64(0), idx) + assert.Len(t, out, 0) + + // Update the entry that only leaves one service + assert.Nil(t, s.EnsureConfigEntry(25, &structs.TerminatingGatewayConfigEntry{ + Kind: "terminating-gateway", + Name: "gateway", + Services: []structs.LinkedService{ + { + Name: "db", + }, + }, + })) + assert.True(t, watchFired(ws)) + + ws = memdb.NewWatchSet() + idx, out, err = s.ServiceGateways(ws, "db", structs.ServiceKindTerminatingGateway, *structs.DefaultEnterpriseMetaInDefaultPartition()) + assert.Nil(t, err) + assert.Equal(t, uint64(25), idx) + assert.Len(t, out, 1) + + // previously associated services should not be present + expect = structs.CheckServiceNodes{ + { + Node: &structs.Node{ + ID: "", + Address: "127.0.0.2", + Node: "baz", + Partition: acl.DefaultPartitionName, + RaftIndex: structs.RaftIndex{ + CreateIndex: 12, + ModifyIndex: 12, + }, + }, + Service: &structs.NodeService{ + Service: "gateway", + Kind: structs.ServiceKindTerminatingGateway, + ID: "gateway", + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + Weights: &structs.Weights{Passing: 1, Warning: 1}, + Port: 443, + RaftIndex: structs.RaftIndex{ + CreateIndex: 20, + ModifyIndex: 20, + }, + }, + }, + } + assert.Equal(t, expect, out) + + // Attempt to associate a different gateway with services that include db + assert.Nil(t, s.EnsureConfigEntry(26, &structs.TerminatingGatewayConfigEntry{ + Kind: "terminating-gateway", + Name: "gateway2", + Services: []structs.LinkedService{ + { + Name: "*", + }, + }, + })) + + // check that watchset fired for new terminating gateway node service + assert.Nil(t, s.EnsureService(20, "baz", &structs.NodeService{Kind: structs.ServiceKindTerminatingGateway, ID: "gateway2", Service: "gateway2", Port: 443})) + assert.True(t, watchFired(ws)) + + ws = memdb.NewWatchSet() + idx, out, err = s.ServiceGateways(ws, "db", structs.ServiceKindTerminatingGateway, *structs.DefaultEnterpriseMetaInDefaultPartition()) + assert.Nil(t, err) + assert.Equal(t, uint64(26), idx) + assert.Len(t, out, 2) + + expect = structs.CheckServiceNodes{ + { + Node: &structs.Node{ + ID: "", + Address: "127.0.0.2", + Node: "baz", + Partition: acl.DefaultPartitionName, + RaftIndex: structs.RaftIndex{ + CreateIndex: 12, + ModifyIndex: 12, + }, + }, + Service: &structs.NodeService{ + Service: "gateway", + Kind: structs.ServiceKindTerminatingGateway, + ID: "gateway", + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + Weights: &structs.Weights{Passing: 1, Warning: 1}, + Port: 443, + RaftIndex: structs.RaftIndex{ + CreateIndex: 20, + ModifyIndex: 20, + }, + }, + }, + { + Node: &structs.Node{ + ID: "", + Address: "127.0.0.2", + Node: "baz", + Partition: acl.DefaultPartitionName, + RaftIndex: structs.RaftIndex{ + CreateIndex: 12, + ModifyIndex: 12, + }, + }, + Service: &structs.NodeService{ + Service: "gateway2", + Kind: structs.ServiceKindTerminatingGateway, + ID: "gateway2", + EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), + Weights: &structs.Weights{Passing: 1, Warning: 1}, + Port: 443, + RaftIndex: structs.RaftIndex{ + CreateIndex: 20, + ModifyIndex: 20, + }, + }, + }, + } + assert.Equal(t, expect, out) + + // Deleting the all gateway's node services should trigger the watch and keep the raft index stable + assert.Nil(t, s.DeleteService(27, "baz", "gateway", structs.DefaultEnterpriseMetaInDefaultPartition(), structs.DefaultPeerKeyword)) + assert.True(t, watchFired(ws)) + assert.Nil(t, s.DeleteService(28, "baz", "gateway2", structs.DefaultEnterpriseMetaInDefaultPartition(), structs.DefaultPeerKeyword)) + + ws = memdb.NewWatchSet() + idx, out, err = s.ServiceGateways(ws, "db", structs.ServiceKindTerminatingGateway, *structs.DefaultEnterpriseMetaInDefaultPartition()) + assert.Nil(t, err) + assert.Equal(t, uint64(28), idx) + assert.Len(t, out, 0) + + // Deleting the config entry even with a node service should remove existing mappings + assert.Nil(t, s.EnsureService(29, "baz", &structs.NodeService{Kind: structs.ServiceKindTerminatingGateway, ID: "gateway", Service: "gateway", Port: 443})) + assert.Nil(t, s.DeleteConfigEntry(30, "terminating-gateway", "gateway", nil)) + assert.True(t, watchFired(ws)) + + idx, out, err = s.ServiceGateways(ws, "api", structs.ServiceKindTerminatingGateway, *structs.DefaultEnterpriseMetaInDefaultPartition()) + assert.Nil(t, err) + // TODO: similar to ingress, the index can backslide if the config is deleted. + assert.Equal(t, uint64(28), idx) + assert.Len(t, out, 0) +} + func TestStateStore_GatewayServices_ServiceDeletion(t *testing.T) { s := testStateStore(t) diff --git a/agent/consul/state/events.go b/agent/consul/state/events.go index e59624511..2e74c44c9 100644 --- a/agent/consul/state/events.go +++ b/agent/consul/state/events.go @@ -43,6 +43,12 @@ func PBToStreamSubscribeRequest(req *pbsubscribe.SubscribeRequest, entMeta acl.E Name: named.Key, EnterpriseMeta: &entMeta, } + case EventTopicServiceList: + // Events on this topic are published to SubjectNone, but rather than + // exposing this in (and further complicating) the streaming API we rely + // on consumers passing WildcardSubject instead, which is functionally the + // same for this purpose. + return nil, fmt.Errorf("topic %s can only be consumed using WildcardSubject", EventTopicServiceList) default: return nil, fmt.Errorf("cannot construct subject for topic %s", req.Topic) } diff --git a/agent/consul/state/memdb.go b/agent/consul/state/memdb.go index 95a291061..751622977 100644 --- a/agent/consul/state/memdb.go +++ b/agent/consul/state/memdb.go @@ -184,6 +184,7 @@ var ( EventTopicServiceResolver = pbsubscribe.Topic_ServiceResolver EventTopicIngressGateway = pbsubscribe.Topic_IngressGateway EventTopicServiceIntentions = pbsubscribe.Topic_ServiceIntentions + EventTopicServiceList = pbsubscribe.Topic_ServiceList ) func processDBChanges(tx ReadTxn, changes Changes) ([]stream.Event, error) { @@ -192,6 +193,7 @@ func processDBChanges(tx ReadTxn, changes Changes) ([]stream.Event, error) { aclChangeUnsubscribeEvent, caRootsChangeEvents, ServiceHealthEventsFromChanges, + ServiceListUpdateEventsFromChanges, ConfigEntryEventsFromChanges, // TODO: add other table handlers here. } diff --git a/agent/consul/state/peering.go b/agent/consul/state/peering.go index e48684923..4bf19de69 100644 --- a/agent/consul/state/peering.go +++ b/agent/consul/state/peering.go @@ -213,6 +213,13 @@ func (s *Store) PeeringWrite(idx uint64, p *pbpeering.Peering) error { return fmt.Errorf("cannot write to peering that is marked for deletion") } + if p.State == pbpeering.PeeringState_UNDEFINED { + p.State = existing.State + } + // TODO(peering): Confirm behavior when /peering/token is called more than once. + // We may need to avoid clobbering existing values. + p.ImportedServiceCount = existing.ImportedServiceCount + p.ExportedServiceCount = existing.ExportedServiceCount p.CreateIndex = existing.CreateIndex p.ModifyIndex = idx } else { @@ -346,7 +353,9 @@ func (s *Store) ExportedServicesForAllPeersByName(ws memdb.WatchSet, entMeta acl } m := list.ListAllDiscoveryChains() if len(m) > 0 { - out[peering.Name] = maps.SliceOfKeys(m) + sns := maps.SliceOfKeys[structs.ServiceName, structs.ExportedDiscoveryChainInfo](m) + sort.Sort(structs.ServiceList(sns)) + out[peering.Name] = sns } } diff --git a/agent/dns_test.go b/agent/dns_test.go index f0d82d2e7..51f2b6d54 100644 --- a/agent/dns_test.go +++ b/agent/dns_test.go @@ -6075,7 +6075,7 @@ func TestDNS_PreparedQuery_Failover(t *testing.T) { Name: "my-query", Service: structs.ServiceQuery{ Service: "db", - Failover: structs.QueryDatacenterOptions{ + Failover: structs.QueryFailoverOptions{ Datacenters: []string{"dc2"}, }, }, diff --git a/agent/grpc-external/server.go b/agent/grpc-external/server.go index 606dba642..751cca91c 100644 --- a/agent/grpc-external/server.go +++ b/agent/grpc-external/server.go @@ -5,6 +5,8 @@ import ( recovery "github.com/grpc-ecosystem/go-grpc-middleware/recovery" "google.golang.org/grpc" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/keepalive" + "time" agentmiddleware "github.com/hashicorp/consul/agent/grpc-middleware" "github.com/hashicorp/consul/tlsutil" @@ -25,6 +27,12 @@ func NewServer(logger agentmiddleware.Logger, tls *tlsutil.Configurator) *grpc.S // Add middlware interceptors to recover in case of panics. recovery.StreamServerInterceptor(recoveryOpts...), ), + grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ + // This must be less than the keealive.ClientParameters Time setting, otherwise + // the server will disconnect the client for sending too many keepalive pings. + // Currently the client param is set to 30s. + MinTime: 15 * time.Second, + }), } if tls != nil && tls.GRPCTLSConfigured() { creds := credentials.NewTLS(tls.IncomingGRPCConfig()) diff --git a/agent/grpc-external/services/peerstream/health_snapshot.go b/agent/grpc-external/services/peerstream/health_snapshot.go index c6cb3243b..a9f676827 100644 --- a/agent/grpc-external/services/peerstream/health_snapshot.go +++ b/agent/grpc-external/services/peerstream/health_snapshot.go @@ -8,7 +8,11 @@ import ( // healthSnapshot represents a normalized view of a set of CheckServiceNodes // meant for easy comparison to aid in differential synchronization type healthSnapshot struct { - Nodes map[types.NodeID]*nodeSnapshot + // Nodes is a map of a node name to a nodeSnapshot. Ideally we would be able to use + // the types.NodeID and assume they are UUIDs for the map key but Consul doesn't + // require a NodeID. Therefore we must key off of the only bit of ID material + // that is required which is the node name. + Nodes map[string]*nodeSnapshot } type nodeSnapshot struct { @@ -40,20 +44,20 @@ func newHealthSnapshot(all []structs.CheckServiceNode, partition, peerName strin } snap := &healthSnapshot{ - Nodes: make(map[types.NodeID]*nodeSnapshot), + Nodes: make(map[string]*nodeSnapshot), } for _, instance := range all { - if instance.Node.ID == "" { - panic("TODO(peering): data should always have a node ID") + if instance.Node.Node == "" { + panic("TODO(peering): data should always have a node name") } - nodeSnap, ok := snap.Nodes[instance.Node.ID] + nodeSnap, ok := snap.Nodes[instance.Node.Node] if !ok { nodeSnap = &nodeSnapshot{ Node: instance.Node, Services: make(map[structs.ServiceID]*serviceSnapshot), } - snap.Nodes[instance.Node.ID] = nodeSnap + snap.Nodes[instance.Node.Node] = nodeSnap } if instance.Service.ID == "" { diff --git a/agent/grpc-external/services/peerstream/health_snapshot_test.go b/agent/grpc-external/services/peerstream/health_snapshot_test.go index 74731b55f..afd83f220 100644 --- a/agent/grpc-external/services/peerstream/health_snapshot_test.go +++ b/agent/grpc-external/services/peerstream/health_snapshot_test.go @@ -69,8 +69,8 @@ func TestHealthSnapshot(t *testing.T) { }, }, expect: &healthSnapshot{ - Nodes: map[types.NodeID]*nodeSnapshot{ - "abc-123": { + Nodes: map[string]*nodeSnapshot{ + "abc": { Node: newNode("abc-123", "abc", "my-peer"), Services: map[structs.ServiceID]*serviceSnapshot{ structs.NewServiceID("xyz-123", nil): { @@ -88,14 +88,14 @@ func TestHealthSnapshot(t *testing.T) { name: "multiple", in: []structs.CheckServiceNode{ { - Node: newNode("abc-123", "abc", ""), + Node: newNode("", "abc", ""), Service: newService("xyz-123", 8080, ""), Checks: structs.HealthChecks{ newCheck("abc", "xyz-123", ""), }, }, { - Node: newNode("abc-123", "abc", ""), + Node: newNode("", "abc", ""), Service: newService("xyz-789", 8181, ""), Checks: structs.HealthChecks{ newCheck("abc", "xyz-789", ""), @@ -110,9 +110,9 @@ func TestHealthSnapshot(t *testing.T) { }, }, expect: &healthSnapshot{ - Nodes: map[types.NodeID]*nodeSnapshot{ - "abc-123": { - Node: newNode("abc-123", "abc", "my-peer"), + Nodes: map[string]*nodeSnapshot{ + "abc": { + Node: newNode("", "abc", "my-peer"), Services: map[structs.ServiceID]*serviceSnapshot{ structs.NewServiceID("xyz-123", nil): { Service: newService("xyz-123", 8080, "my-peer"), @@ -128,7 +128,7 @@ func TestHealthSnapshot(t *testing.T) { }, }, }, - "def-456": { + "def": { Node: newNode("def-456", "def", "my-peer"), Services: map[structs.ServiceID]*serviceSnapshot{ structs.NewServiceID("xyz-456", nil): { diff --git a/agent/grpc-external/services/peerstream/replication.go b/agent/grpc-external/services/peerstream/replication.go index f9f5ce76b..be79a23bd 100644 --- a/agent/grpc-external/services/peerstream/replication.go +++ b/agent/grpc-external/services/peerstream/replication.go @@ -5,10 +5,9 @@ import ( "fmt" "strings" - "github.com/golang/protobuf/proto" - "github.com/golang/protobuf/ptypes" "github.com/hashicorp/go-hclog" "google.golang.org/genproto/googleapis/rpc/code" + newproto "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" "github.com/hashicorp/consul/agent/cache" @@ -37,15 +36,24 @@ import ( // If there are no instances in the event, we consider that to be a de-registration. func makeServiceResponse( logger hclog.Logger, + mst *MutableStatus, update cache.UpdateEvent, ) (*pbpeerstream.ReplicationMessage_Response, error) { - any, csn, err := marshalToProtoAny[*pbservice.IndexedCheckServiceNodes](update.Result) + serviceName := strings.TrimPrefix(update.CorrelationID, subExportedService) + sn := structs.ServiceNameFromString(serviceName) + csn, ok := update.Result.(*pbservice.IndexedCheckServiceNodes) + if !ok { + return nil, fmt.Errorf("invalid type for service response: %T", update.Result) + } + + export := &pbpeerstream.ExportedService{ + Nodes: csn.Nodes, + } + + any, err := anypb.New(export) if err != nil { return nil, fmt.Errorf("failed to marshal: %w", err) } - - serviceName := strings.TrimPrefix(update.CorrelationID, subExportedService) - // If no nodes are present then it's due to one of: // 1. The service is newly registered or exported and yielded a transient empty update. // 2. All instances of the service were de-registered. @@ -54,8 +62,10 @@ func makeServiceResponse( // We don't distinguish when these three things occurred, but it's safe to send a DELETE Op in all cases, so we do that. // Case #1 is a no-op for the importing peer. if len(csn.Nodes) == 0 { + mst.RemoveExportedService(sn) + return &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, // TODO(peering): Nonce management Nonce: "", ResourceID: serviceName, @@ -63,9 +73,11 @@ func makeServiceResponse( }, nil } + mst.TrackExportedService(sn) + // If there are nodes in the response, we push them as an UPSERT operation. return &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, // TODO(peering): Nonce management Nonce: "", ResourceID: serviceName, @@ -84,7 +96,7 @@ func makeCARootsResponse( } return &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLRoots, + ResourceURL: pbpeerstream.TypeURLPeeringTrustBundle, // TODO(peering): Nonce management Nonce: "", ResourceID: "roots", @@ -97,13 +109,13 @@ func makeCARootsResponse( // the protobuf.Any type, the asserted T type, and any errors // during marshalling or type assertion. // `in` MUST be of type T or it returns an error. -func marshalToProtoAny[T proto.Message](in any) (*anypb.Any, T, error) { +func marshalToProtoAny[T newproto.Message](in any) (*anypb.Any, T, error) { typ, ok := in.(T) if !ok { var outType T return nil, typ, fmt.Errorf("input type is not %T: %T", outType, in) } - any, err := ptypes.MarshalAny(typ) + any, err := anypb.New(typ) if err != nil { return nil, typ, err } @@ -113,7 +125,9 @@ func marshalToProtoAny[T proto.Message](in any) (*anypb.Any, T, error) { func (s *Server) processResponse( peerName string, partition string, + mutableStatus *MutableStatus, resp *pbpeerstream.ReplicationMessage_Response, + logger hclog.Logger, ) (*pbpeerstream.ReplicationMessage, error) { if !pbpeerstream.KnownTypeURL(resp.ResourceURL) { err := fmt.Errorf("received response for unknown resource type %q", resp.ResourceURL) @@ -137,7 +151,7 @@ func (s *Server) processResponse( ), err } - if err := s.handleUpsert(peerName, partition, resp.ResourceURL, resp.ResourceID, resp.Resource); err != nil { + if err := s.handleUpsert(peerName, partition, mutableStatus, resp.ResourceURL, resp.ResourceID, resp.Resource, logger); err != nil { return makeNACKReply( resp.ResourceURL, resp.Nonce, @@ -149,7 +163,7 @@ func (s *Server) processResponse( return makeACKReply(resp.ResourceURL, resp.Nonce), nil case pbpeerstream.Operation_OPERATION_DELETE: - if err := s.handleDelete(peerName, partition, resp.ResourceURL, resp.ResourceID); err != nil { + if err := s.handleDelete(peerName, partition, mutableStatus, resp.ResourceURL, resp.ResourceID, logger); err != nil { return makeNACKReply( resp.ResourceURL, resp.Nonce, @@ -178,25 +192,38 @@ func (s *Server) processResponse( func (s *Server) handleUpsert( peerName string, partition string, + mutableStatus *MutableStatus, resourceURL string, resourceID string, resource *anypb.Any, + logger hclog.Logger, ) error { + if resource.TypeUrl != resourceURL { + return fmt.Errorf("mismatched resourceURL %q and Any typeUrl %q", resourceURL, resource.TypeUrl) + } + switch resourceURL { - case pbpeerstream.TypeURLService: + case pbpeerstream.TypeURLExportedService: sn := structs.ServiceNameFromString(resourceID) sn.OverridePartition(partition) - csn := &pbservice.IndexedCheckServiceNodes{} - if err := ptypes.UnmarshalAny(resource, csn); err != nil { + export := &pbpeerstream.ExportedService{} + if err := resource.UnmarshalTo(export); err != nil { return fmt.Errorf("failed to unmarshal resource: %w", err) } - return s.handleUpdateService(peerName, partition, sn, csn) + err := s.handleUpdateService(peerName, partition, sn, export) + if err != nil { + return fmt.Errorf("did not increment imported services count for service=%q: %w", sn.String(), err) + } - case pbpeerstream.TypeURLRoots: + mutableStatus.TrackImportedService(sn) + + return nil + + case pbpeerstream.TypeURLPeeringTrustBundle: roots := &pbpeering.PeeringTrustBundle{} - if err := ptypes.UnmarshalAny(resource, roots); err != nil { + if err := resource.UnmarshalTo(roots); err != nil { return fmt.Errorf("failed to unmarshal resource: %w", err) } @@ -219,7 +246,7 @@ func (s *Server) handleUpdateService( peerName string, partition string, sn structs.ServiceName, - pbNodes *pbservice.IndexedCheckServiceNodes, + export *pbpeerstream.ExportedService, ) error { // Capture instances in the state store for reconciliation later. _, storedInstances, err := s.GetStore().CheckServiceNodes(nil, sn.Name, &sn.EnterpriseMeta, peerName) @@ -227,7 +254,7 @@ func (s *Server) handleUpdateService( return fmt.Errorf("failed to read imported services: %w", err) } - structsNodes, err := pbNodes.CheckServiceNodesToStruct() + structsNodes, err := export.CheckServiceNodesToStruct() if err != nil { return fmt.Errorf("failed to convert protobuf instances to structs: %w", err) } @@ -290,8 +317,8 @@ func (s *Server) handleUpdateService( deletedNodeChecks = make(map[nodeCheckTuple]struct{}) ) for _, csn := range storedInstances { - if _, ok := snap.Nodes[csn.Node.ID]; !ok { - unusedNodes[string(csn.Node.ID)] = struct{}{} + if _, ok := snap.Nodes[csn.Node.Node]; !ok { + unusedNodes[csn.Node.Node] = struct{}{} // Since the node is not in the snapshot we can know the associated service // instance is not in the snapshot either, since a service instance can't @@ -316,7 +343,7 @@ func (s *Server) handleUpdateService( // Delete the service instance if not in the snapshot. sid := csn.Service.CompoundServiceID() - if _, ok := snap.Nodes[csn.Node.ID].Services[sid]; !ok { + if _, ok := snap.Nodes[csn.Node.Node].Services[sid]; !ok { err := s.Backend.CatalogDeregister(&structs.DeregisterRequest{ Node: csn.Node.Node, ServiceID: csn.Service.ID, @@ -335,7 +362,7 @@ func (s *Server) handleUpdateService( // Reconcile checks. for _, chk := range csn.Checks { - if _, ok := snap.Nodes[csn.Node.ID].Services[sid].Checks[chk.CheckID]; !ok { + if _, ok := snap.Nodes[csn.Node.Node].Services[sid].Checks[chk.CheckID]; !ok { // Checks without a ServiceID are node checks. // If the node exists but the check does not then the check was deleted. if chk.ServiceID == "" { @@ -425,14 +452,24 @@ func (s *Server) handleUpsertRoots( func (s *Server) handleDelete( peerName string, partition string, + mutableStatus *MutableStatus, resourceURL string, resourceID string, + logger hclog.Logger, ) error { switch resourceURL { - case pbpeerstream.TypeURLService: + case pbpeerstream.TypeURLExportedService: sn := structs.ServiceNameFromString(resourceID) sn.OverridePartition(partition) - return s.handleUpdateService(peerName, partition, sn, nil) + + err := s.handleUpdateService(peerName, partition, sn, nil) + if err != nil { + return err + } + + mutableStatus.RemoveImportedService(sn) + + return nil default: return fmt.Errorf("unexpected resourceURL: %s", resourceURL) diff --git a/agent/grpc-external/services/peerstream/server.go b/agent/grpc-external/services/peerstream/server.go index a71c30d31..96694d63e 100644 --- a/agent/grpc-external/services/peerstream/server.go +++ b/agent/grpc-external/services/peerstream/server.go @@ -1,6 +1,8 @@ package peerstream import ( + "time" + "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-memdb" "google.golang.org/grpc" @@ -17,6 +19,11 @@ import ( // TODO(peering): fix up these interfaces to be more testable now that they are // extracted from private peering +const ( + defaultOutgoingHeartbeatInterval = 15 * time.Second + defaultIncomingHeartbeatTimeout = 2 * time.Minute +) + type Server struct { Config } @@ -30,6 +37,12 @@ type Config struct { // Datacenter of the Consul server this gRPC server is hosted on Datacenter string ConnectEnabled bool + + // outgoingHeartbeatInterval is how often we send a heartbeat. + outgoingHeartbeatInterval time.Duration + + // incomingHeartbeatTimeout is how long we'll wait between receiving heartbeats before we close the connection. + incomingHeartbeatTimeout time.Duration } //go:generate mockery --name ACLResolver --inpackage @@ -46,6 +59,12 @@ func NewServer(cfg Config) *Server { if cfg.Datacenter == "" { panic("Datacenter is required") } + if cfg.outgoingHeartbeatInterval == 0 { + cfg.outgoingHeartbeatInterval = defaultOutgoingHeartbeatInterval + } + if cfg.incomingHeartbeatTimeout == 0 { + cfg.incomingHeartbeatTimeout = defaultIncomingHeartbeatTimeout + } return &Server{ Config: cfg, } diff --git a/agent/grpc-external/services/peerstream/stream_resources.go b/agent/grpc-external/services/peerstream/stream_resources.go index 611340082..1feb7f01d 100644 --- a/agent/grpc-external/services/peerstream/stream_resources.go +++ b/agent/grpc-external/services/peerstream/stream_resources.go @@ -5,11 +5,12 @@ import ( "fmt" "io" "strings" + "sync" + "time" "github.com/golang/protobuf/jsonpb" "github.com/golang/protobuf/proto" "github.com/hashicorp/go-hclog" - "google.golang.org/genproto/googleapis/rpc/code" "google.golang.org/grpc/codes" grpcstatus "google.golang.org/grpc/status" @@ -99,11 +100,12 @@ func (s *Server) StreamResources(stream pbpeerstream.PeerStreamService_StreamRes } streamReq := HandleStreamRequest{ - LocalID: p.ID, - RemoteID: "", - PeerName: p.Name, - Partition: p.Partition, - Stream: stream, + LocalID: p.ID, + RemoteID: "", + PeerName: p.Name, + Partition: p.Partition, + InitialResourceURL: req.ResourceURL, + Stream: stream, } err = s.HandleStream(streamReq) // A nil error indicates that the peering was deleted and the stream needs to be gracefully shutdown. @@ -129,6 +131,9 @@ type HandleStreamRequest struct { // Partition is the local partition associated with the peer. Partition string + // InitialResourceURL is the ResourceURL from the initial Request. + InitialResourceURL string + // Stream is the open stream to the peer cluster. Stream BidirectionalStream } @@ -155,9 +160,19 @@ func (s *Server) DrainStream(req HandleStreamRequest) { } } +func (s *Server) HandleStream(streamReq HandleStreamRequest) error { + if err := s.realHandleStream(streamReq); err != nil { + s.Tracker.DisconnectedDueToError(streamReq.LocalID, err.Error()) + return err + } + // TODO(peering) Also need to clear subscriptions associated with the peer + s.Tracker.DisconnectedGracefully(streamReq.LocalID) + return nil +} + // The localID provided is the locally-generated identifier for the peering. // The remoteID is an identifier that the remote peer recognizes for the peering. -func (s *Server) HandleStream(streamReq HandleStreamRequest) error { +func (s *Server) realHandleStream(streamReq HandleStreamRequest) error { // TODO: pass logger down from caller? logger := s.Logger.Named("stream"). With("peer_name", streamReq.PeerName). @@ -170,9 +185,6 @@ func (s *Server) HandleStream(streamReq HandleStreamRequest) error { return fmt.Errorf("failed to register stream: %v", err) } - // TODO(peering) Also need to clear subscriptions associated with the peer - defer s.Tracker.Disconnected(streamReq.LocalID) - var trustDomain string if s.ConnectEnabled { // Read the TrustDomain up front - we do not allow users to change the ClusterID @@ -183,6 +195,13 @@ func (s *Server) HandleStream(streamReq HandleStreamRequest) error { } } + remoteSubTracker := newResourceSubscriptionTracker() + if streamReq.InitialResourceURL != "" { + if remoteSubTracker.Subscribe(streamReq.InitialResourceURL) { + logger.Info("subscribing to resource type", "resourceURL", streamReq.InitialResourceURL) + } + } + mgr := newSubscriptionManager( streamReq.Stream.Context(), logger, @@ -190,24 +209,46 @@ func (s *Server) HandleStream(streamReq HandleStreamRequest) error { trustDomain, s.Backend, s.GetStore, + remoteSubTracker, ) subCh := mgr.subscribe(streamReq.Stream.Context(), streamReq.LocalID, streamReq.PeerName, streamReq.Partition) - sub := makeReplicationRequest(&pbpeerstream.ReplicationMessage_Request{ - ResourceURL: pbpeerstream.TypeURLService, - PeerID: streamReq.RemoteID, - }) - logTraceSend(logger, sub) + // We need a mutex to protect against simultaneous sends to the client. + var sendMutex sync.Mutex - if err := streamReq.Stream.Send(sub); err != nil { - if err == io.EOF { - logger.Info("stream ended by peer") - status.TrackReceiveError(err.Error()) - return nil + // streamSend is a helper function that sends msg over the stream + // respecting the send mutex. It also logs the send and calls status.TrackSendError + // on error. + streamSend := func(msg *pbpeerstream.ReplicationMessage) error { + logTraceSend(logger, msg) + + sendMutex.Lock() + err := streamReq.Stream.Send(msg) + sendMutex.Unlock() + + if err != nil { + status.TrackSendError(err.Error()) + } + return err + } + + // Subscribe to all relevant resource types. + for _, resourceURL := range []string{ + pbpeerstream.TypeURLExportedService, + pbpeerstream.TypeURLPeeringTrustBundle, + } { + sub := makeReplicationRequest(&pbpeerstream.ReplicationMessage_Request{ + ResourceURL: resourceURL, + PeerID: streamReq.RemoteID, + }) + if err := streamSend(sub); err != nil { + if err == io.EOF { + logger.Info("stream ended by peer") + return nil + } + // TODO(peering) Test error handling in calls to Send/Recv + return fmt.Errorf("failed to send subscription for %q to stream: %w", resourceURL, err) } - // TODO(peering) Test error handling in calls to Send/Recv - status.TrackSendError(err.Error()) - return fmt.Errorf("failed to send to stream: %v", err) } // TODO(peering): Should this be buffered? @@ -224,15 +265,49 @@ func (s *Server) HandleStream(streamReq HandleStreamRequest) error { if err == io.EOF { logger.Info("stream ended by peer") - status.TrackReceiveError(err.Error()) + status.TrackRecvError(err.Error()) return } logger.Error("failed to receive from stream", "error", err) - status.TrackReceiveError(err.Error()) + status.TrackRecvError(err.Error()) return } }() + // Heartbeat sender. + go func() { + tick := time.NewTicker(s.outgoingHeartbeatInterval) + defer tick.Stop() + + for { + select { + case <-streamReq.Stream.Context().Done(): + return + + case <-tick.C: + } + + heartbeat := &pbpeerstream.ReplicationMessage{ + Payload: &pbpeerstream.ReplicationMessage_Heartbeat_{ + Heartbeat: &pbpeerstream.ReplicationMessage_Heartbeat{}, + }, + } + if err := streamSend(heartbeat); err != nil { + logger.Warn("error sending heartbeat", "err", err) + } + } + }() + + // incomingHeartbeatCtx will complete if incoming heartbeats time out. + incomingHeartbeatCtx, incomingHeartbeatCtxCancel := + context.WithTimeout(context.Background(), s.incomingHeartbeatTimeout) + // NOTE: It's important that we wrap the call to cancel in a wrapper func because during the loop we're + // re-assigning the value of incomingHeartbeatCtxCancel and we want the defer to run on the last assigned + // value, not the current value. + defer func() { + incomingHeartbeatCtxCancel() + }() + for { select { // When the doneCh is closed that means that the peering was deleted locally. @@ -244,10 +319,10 @@ func (s *Server) HandleStream(streamReq HandleStreamRequest) error { Terminated: &pbpeerstream.ReplicationMessage_Terminated{}, }, } - logTraceSend(logger, term) - - if err := streamReq.Stream.Send(term); err != nil { - status.TrackSendError(err.Error()) + if err := streamSend(term); err != nil { + // Nolint directive needed due to bug in govet that doesn't see that the cancel + // func of the incomingHeartbeatTimer _does_ get called. + //nolint:govet return fmt.Errorf("failed to send to stream: %v", err) } @@ -256,10 +331,20 @@ func (s *Server) HandleStream(streamReq HandleStreamRequest) error { return nil + // We haven't received a heartbeat within the expected interval. Kill the stream. + case <-incomingHeartbeatCtx.Done(): + logger.Error("ending stream due to heartbeat timeout") + return fmt.Errorf("heartbeat timeout") + case msg, open := <-recvChan: if !open { - logger.Trace("no longer receiving data on the stream") - return nil + // The only time we expect the stream to end is when we've received a "Terminated" message. + // We handle the case of receiving the Terminated message below and then this function exits. + // So if the channel is closed while this function is still running then we haven't received a Terminated + // message which means we want to try and reestablish the stream. + // It's the responsibility of the caller of this function to reestablish the stream on error and so that's + // why we return an error here. + return fmt.Errorf("stream ended unexpectedly") } // NOTE: this code should have similar error handling to the @@ -284,17 +369,86 @@ func (s *Server) HandleStream(streamReq HandleStreamRequest) error { if !pbpeerstream.KnownTypeURL(req.ResourceURL) { return grpcstatus.Errorf(codes.InvalidArgument, "subscription request to unknown resource URL: %s", req.ResourceURL) } - switch { - case req.ResponseNonce == "": - // TODO(peering): This can happen on a client peer since they don't try to receive subscriptions before entering HandleStream. - // Should change that behavior or only allow it that one time. - case req.Error != nil && (req.Error.Code != int32(code.Code_OK) || req.Error.Message != ""): + // There are different formats of requests depending upon where in the stream lifecycle we are. + // + // 1. Initial Request: This is the first request being received + // FROM the establishing peer. This is handled specially in + // (*Server).StreamResources BEFORE calling + // (*Server).HandleStream. This takes care of determining what + // the PeerID is for the stream. This is ALSO treated as (2) below. + // + // 2. Subscription Request: This is the first request for a + // given ResourceURL within a stream. The Initial Request (1) + // is always one of these as well. + // + // These must contain a valid ResourceURL with no Error or + // ResponseNonce set. + // + // It is valid to subscribe to the same ResourceURL twice + // within the lifetime of a stream, but all duplicate + // subscriptions are treated as no-ops upon receipt. + // + // 3. ACK Request: This is the message sent in reaction to an + // earlier Response to indicate that the response was processed + // by the other side successfully. + // + // These must contain a ResponseNonce and no Error. + // + // 4. NACK Request: This is the message sent in reaction to an + // earlier Response to indicate that the response was NOT + // processed by the other side successfully. + // + // These must contain a ResponseNonce and an Error. + // + if !remoteSubTracker.IsSubscribed(req.ResourceURL) { + // This must be a new subscription request to add a new + // resource type, vet it like a new request. + + if !streamReq.WasDialed() { + if req.PeerID != "" && req.PeerID != streamReq.RemoteID { + // Not necessary after the first request from the dialer, + // but if provided must match. + return grpcstatus.Errorf(codes.InvalidArgument, + "initial subscription requests for a resource type must have consistent PeerID values: got=%q expected=%q", + req.PeerID, + streamReq.RemoteID, + ) + } + } + if req.ResponseNonce != "" { + return grpcstatus.Error(codes.InvalidArgument, "initial subscription requests for a resource type must not contain a nonce") + } + if req.Error != nil { + return grpcstatus.Error(codes.InvalidArgument, "initial subscription request for a resource type must not contain an error") + } + + if remoteSubTracker.Subscribe(req.ResourceURL) { + logger.Info("subscribing to resource type", "resourceURL", req.ResourceURL) + } + status.TrackAck() + continue + } + + // At this point we have a valid ResourceURL and we are subscribed to it. + + switch { + case req.ResponseNonce == "" && req.Error != nil: + return grpcstatus.Error(codes.InvalidArgument, "initial subscription request for a resource type must not contain an error") + + case req.ResponseNonce != "" && req.Error == nil: // ACK + // TODO(peering): handle ACK fully + status.TrackAck() + + case req.ResponseNonce != "" && req.Error != nil: // NACK + // TODO(peering): handle NACK fully logger.Warn("client peer was unable to apply resource", "code", req.Error.Code, "error", req.Error.Message) status.TrackNack(fmt.Sprintf("client peer was unable to apply resource: %s", req.Error.Message)) default: - status.TrackAck() + // This branch might be dead code, but it could also happen + // during a stray 're-subscribe' so just ignore the + // message. } continue @@ -302,17 +456,15 @@ func (s *Server) HandleStream(streamReq HandleStreamRequest) error { if resp := msg.GetResponse(); resp != nil { // TODO(peering): Ensure there's a nonce - reply, err := s.processResponse(streamReq.PeerName, streamReq.Partition, resp) + reply, err := s.processResponse(streamReq.PeerName, streamReq.Partition, status, resp, logger) if err != nil { logger.Error("failed to persist resource", "resourceURL", resp.ResourceURL, "resourceID", resp.ResourceID) - status.TrackReceiveError(err.Error()) + status.TrackRecvError(err.Error()) } else { - status.TrackReceiveSuccess() + status.TrackRecvResourceSuccess() } - logTraceSend(logger, reply) - if err := streamReq.Stream.Send(reply); err != nil { - status.TrackSendError(err.Error()) + if err := streamSend(reply); err != nil { return fmt.Errorf("failed to send to stream: %v", err) } @@ -329,11 +481,27 @@ func (s *Server) HandleStream(streamReq HandleStreamRequest) error { return nil } + if msg.GetHeartbeat() != nil { + status.TrackRecvHeartbeat() + + // Reset the heartbeat timeout by creating a new context. + // We first must cancel the old context so there's no leaks. This is safe to do because we're only + // reading that context within this for{} loop, and so we won't accidentally trigger the heartbeat + // timeout. + incomingHeartbeatCtxCancel() + // NOTE: IDEs and govet think that the reassigned cancel below never gets + // called, but it does by the defer when the heartbeat ctx is first created. + // They just can't trace the execution properly for some reason (possibly golang/go#29587). + //nolint:govet + incomingHeartbeatCtx, incomingHeartbeatCtxCancel = + context.WithTimeout(context.Background(), s.incomingHeartbeatTimeout) + } + case update := <-subCh: var resp *pbpeerstream.ReplicationMessage_Response switch { case strings.HasPrefix(update.CorrelationID, subExportedService): - resp, err = makeServiceResponse(logger, update) + resp, err = makeServiceResponse(logger, status, update) if err != nil { // Log the error and skip this response to avoid locking up peering due to a bad update event. logger.Error("failed to create service response", "error", err) @@ -360,10 +528,7 @@ func (s *Server) HandleStream(streamReq HandleStreamRequest) error { } replResp := makeReplicationResponse(resp) - - logTraceSend(logger, replResp) - if err := streamReq.Stream.Send(replResp); err != nil { - status.TrackSendError(err.Error()) + if err := streamSend(replResp); err != nil { return fmt.Errorf("failed to push data for %q: %w", update.CorrelationID, err) } } @@ -383,8 +548,8 @@ func getTrustDomain(store StateStore, logger hclog.Logger) (string, error) { return connect.SpiffeIDSigningForCluster(cfg.ClusterID).Host(), nil } -func (s *Server) StreamStatus(peer string) (resp Status, found bool) { - return s.Tracker.StreamStatus(peer) +func (s *Server) StreamStatus(peerID string) (resp Status, found bool) { + return s.Tracker.StreamStatus(peerID) } // ConnectedStreams returns a map of connected stream IDs to the corresponding channel for tearing them down. @@ -420,3 +585,63 @@ func logTraceProto(logger hclog.Logger, pb proto.Message, received bool) { logger.Trace("replication message", "direction", dir, "protobuf", out) } + +// resourceSubscriptionTracker is used to keep track of the ResourceURLs that a +// stream has subscribed to and can notify you when a subscription comes in by +// closing the channels returned by SubscribedChan. +type resourceSubscriptionTracker struct { + // notifierMap keeps track of a notification channel for each resourceURL. + // Keys may exist in here even when they do not exist in 'subscribed' as + // calling SubscribedChan has to possibly create and and hand out a + // notification channel in advance of any notification. + notifierMap map[string]chan struct{} + + // subscribed is a set that keeps track of resourceURLs that are currently + // subscribed to. Keys are never deleted. If a key is present in this map + // it is also present in 'notifierMap'. + subscribed map[string]struct{} +} + +func newResourceSubscriptionTracker() *resourceSubscriptionTracker { + return &resourceSubscriptionTracker{ + subscribed: make(map[string]struct{}), + notifierMap: make(map[string]chan struct{}), + } +} + +// IsSubscribed returns true if the given ResourceURL has an active subscription. +func (t *resourceSubscriptionTracker) IsSubscribed(resourceURL string) bool { + _, ok := t.subscribed[resourceURL] + return ok +} + +// Subscribe subscribes to the given ResourceURL. It will return true if this +// was the FIRST time a subscription occurred. It will also close the +// notification channel associated with this ResourceURL. +func (t *resourceSubscriptionTracker) Subscribe(resourceURL string) bool { + if _, ok := t.subscribed[resourceURL]; ok { + return false + } + t.subscribed[resourceURL] = struct{}{} + + // and notify + ch := t.ensureNotifierChan(resourceURL) + close(ch) + + return true +} + +// SubscribedChan returns a channel that will be closed when the ResourceURL is +// subscribed using the Subscribe method. +func (t *resourceSubscriptionTracker) SubscribedChan(resourceURL string) <-chan struct{} { + return t.ensureNotifierChan(resourceURL) +} + +func (t *resourceSubscriptionTracker) ensureNotifierChan(resourceURL string) chan struct{} { + if ch, ok := t.notifierMap[resourceURL]; ok { + return ch + } + ch := make(chan struct{}) + t.notifierMap[resourceURL] = ch + return ch +} diff --git a/agent/grpc-external/services/peerstream/stream_test.go b/agent/grpc-external/services/peerstream/stream_test.go index de1455a63..174ecf59f 100644 --- a/agent/grpc-external/services/peerstream/stream_test.go +++ b/agent/grpc-external/services/peerstream/stream_test.go @@ -12,17 +12,17 @@ import ( "testing" "time" - "github.com/golang/protobuf/proto" - "github.com/golang/protobuf/ptypes" - "github.com/golang/protobuf/ptypes/any" "github.com/hashicorp/go-uuid" "github.com/stretchr/testify/require" "google.golang.org/genproto/googleapis/rpc/code" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/cache" "github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/stream" @@ -97,18 +97,6 @@ func TestStreamResources_Server_LeaderBecomesFollower(t *testing.T) { backend.leaderAddr = "expected:address" }) - client := NewMockClient(context.Background()) - - errCh := make(chan error, 1) - client.ErrCh = errCh - - go func() { - err := srv.StreamResources(client.ReplicationStream) - if err != nil { - errCh <- err - } - }() - p := writePeeringToBeDialed(t, store, 1, "my-peer") require.Empty(t, p.PeerID, "should be empty if being dialed") peerID := p.ID @@ -116,53 +104,73 @@ func TestStreamResources_Server_LeaderBecomesFollower(t *testing.T) { // Set the initial roots and CA configuration. _, _ = writeInitialRootsAndCA(t, store) - // Receive a subscription from a peer - sub := &pbpeerstream.ReplicationMessage{ - Payload: &pbpeerstream.ReplicationMessage_Request_{ - Request: &pbpeerstream.ReplicationMessage_Request{ - PeerID: peerID, - ResourceURL: pbpeerstream.TypeURLService, + client := NewMockClient(context.Background()) + + errCh := make(chan error, 1) + client.ErrCh = errCh + + go func() { + // Pass errors from server handler into ErrCh so that they can be seen by the client on Recv(). + // This matches gRPC's behavior when an error is returned by a server. + if err := srv.StreamResources(client.ReplicationStream); err != nil { + errCh <- err + } + }() + + // Receive a subscription from a peer. This message arrives while the + // server is a leader and should work. + testutil.RunStep(t, "send subscription request to leader and consume its two requests", func(t *testing.T) { + sub := &pbpeerstream.ReplicationMessage{ + Payload: &pbpeerstream.ReplicationMessage_Request_{ + Request: &pbpeerstream.ReplicationMessage_Request{ + PeerID: peerID, + ResourceURL: pbpeerstream.TypeURLExportedService, + }, }, - }, - } - err := client.Send(sub) - require.NoError(t, err) + } + err := client.Send(sub) + require.NoError(t, err) - msg, err := client.Recv() - require.NoError(t, err) - require.NotEmpty(t, msg) + msg1, err := client.Recv() + require.NoError(t, err) + require.NotEmpty(t, msg1) - receiveRoots, err := client.Recv() - require.NoError(t, err) - require.NotNil(t, receiveRoots.GetResponse()) - require.Equal(t, pbpeerstream.TypeURLRoots, receiveRoots.GetResponse().ResourceURL) + msg2, err := client.Recv() + require.NoError(t, err) + require.NotEmpty(t, msg2) + }) - input2 := &pbpeerstream.ReplicationMessage{ - Payload: &pbpeerstream.ReplicationMessage_Request_{ - Request: &pbpeerstream.ReplicationMessage_Request{ - ResourceURL: pbpeerstream.TypeURLService, - ResponseNonce: "1", + // The ACK will be a new request but at this point the server is not the + // leader in the test and this should fail. + testutil.RunStep(t, "ack fails with non leader", func(t *testing.T) { + ack := &pbpeerstream.ReplicationMessage{ + Payload: &pbpeerstream.ReplicationMessage_Request_{ + Request: &pbpeerstream.ReplicationMessage_Request{ + ResourceURL: pbpeerstream.TypeURLExportedService, + ResponseNonce: "1", + }, }, - }, - } + } - err2 := client.Send(input2) - require.NoError(t, err2) + err := client.Send(ack) + require.NoError(t, err) - // expect error - msg2, err2 := client.Recv() - require.Nil(t, msg2) - require.Error(t, err2) - require.EqualError(t, err2, "rpc error: code = FailedPrecondition desc = node is not a leader anymore; cannot continue streaming") + // expect error + msg, err := client.Recv() + require.Nil(t, msg) + require.Error(t, err) + require.EqualError(t, err, "rpc error: code = FailedPrecondition desc = node is not a leader anymore; cannot continue streaming") - // expect a status error - st, ok := status.FromError(err2) - require.True(t, ok, "need to get back a grpc status error") - deets := st.Details() + // expect a status error + st, ok := status.FromError(err) + require.True(t, ok, "need to get back a grpc status error") - // expect a LeaderAddress message - exp := []interface{}{&pbpeerstream.LeaderAddress{Address: "expected:address"}} - prototest.AssertDeepEqual(t, exp, deets) + // expect a LeaderAddress message + expect := []interface{}{ + &pbpeerstream.LeaderAddress{Address: "expected:address"}, + } + prototest.AssertDeepEqual(t, expect, st.Details()) + }) } func TestStreamResources_Server_FirstRequest(t *testing.T) { @@ -204,7 +212,7 @@ func TestStreamResources_Server_FirstRequest(t *testing.T) { input: &pbpeerstream.ReplicationMessage{ Payload: &pbpeerstream.ReplicationMessage_Response_{ Response: &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResourceID: "api-service", Nonce: "2", }, @@ -251,7 +259,7 @@ func TestStreamResources_Server_FirstRequest(t *testing.T) { Payload: &pbpeerstream.ReplicationMessage_Request_{ Request: &pbpeerstream.ReplicationMessage_Request{ PeerID: "63b60245-c475-426b-b314-4588d210859d", - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, }, }, }, @@ -264,7 +272,6 @@ func TestStreamResources_Server_FirstRequest(t *testing.T) { run(t, tc) }) } - } func TestStreamResources_Server_Terminate(t *testing.T) { @@ -291,7 +298,7 @@ func TestStreamResources_Server_Terminate(t *testing.T) { receiveRoots, err := client.Recv() require.NoError(t, err) require.NotNil(t, receiveRoots.GetResponse()) - require.Equal(t, pbpeerstream.TypeURLRoots, receiveRoots.GetResponse().ResourceURL) + require.Equal(t, pbpeerstream.TypeURLPeeringTrustBundle, receiveRoots.GetResponse().ResourceURL) testutil.RunStep(t, "new stream gets tracked", func(t *testing.T) { retry.Run(t, func(r *retry.R) { @@ -347,7 +354,6 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { }) }) - var sequence uint64 var lastSendSuccess time.Time testutil.RunStep(t, "ack tracked as success", func(t *testing.T) { @@ -355,18 +361,17 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { Payload: &pbpeerstream.ReplicationMessage_Request_{ Request: &pbpeerstream.ReplicationMessage_Request{ PeerID: peerID, - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResponseNonce: "1", // Acks do not have an Error populated in the request }, }, } + + lastSendSuccess = it.FutureNow(1) err := client.Send(ack) require.NoError(t, err) - sequence++ - - lastSendSuccess = it.base.Add(time.Duration(sequence) * time.Second).UTC() expect := Status{ Connected: true, @@ -388,7 +393,7 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { Payload: &pbpeerstream.ReplicationMessage_Request_{ Request: &pbpeerstream.ReplicationMessage_Request{ PeerID: peerID, - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResponseNonce: "2", Error: &pbstatus.Status{ Code: int32(code.Code_UNAVAILABLE), @@ -397,12 +402,12 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { }, }, } + + lastNack = it.FutureNow(1) err := client.Send(nack) require.NoError(t, err) - sequence++ lastNackMsg = "client peer was unable to apply resource: bad bad not good" - lastNack = it.base.Add(time.Duration(sequence) * time.Second).UTC() expect := Status{ Connected: true, @@ -418,28 +423,28 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { }) }) - var lastRecvSuccess time.Time + var lastRecvResourceSuccess time.Time testutil.RunStep(t, "response applied locally", func(t *testing.T) { resp := &pbpeerstream.ReplicationMessage{ Payload: &pbpeerstream.ReplicationMessage_Response_{ Response: &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResourceID: "api", Nonce: "21", Operation: pbpeerstream.Operation_OPERATION_UPSERT, - Resource: makeAnyPB(t, &pbservice.IndexedCheckServiceNodes{}), + Resource: makeAnyPB(t, &pbpeerstream.ExportedService{}), }, }, } + lastRecvResourceSuccess = it.FutureNow(1) err := client.Send(resp) require.NoError(t, err) - sequence++ expectRoots := &pbpeerstream.ReplicationMessage{ Payload: &pbpeerstream.ReplicationMessage_Response_{ Response: &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLRoots, + ResourceURL: pbpeerstream.TypeURLPeeringTrustBundle, ResourceID: "roots", Resource: makeAnyPB(t, &pbpeering.PeeringTrustBundle{ TrustDomain: connect.TestTrustDomain, @@ -460,21 +465,24 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { expectAck := &pbpeerstream.ReplicationMessage{ Payload: &pbpeerstream.ReplicationMessage_Request_{ Request: &pbpeerstream.ReplicationMessage_Request{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResponseNonce: "21", }, }, } prototest.AssertDeepEqual(t, expectAck, ack) - lastRecvSuccess = it.base.Add(time.Duration(sequence) * time.Second).UTC() + api := structs.NewServiceName("api", nil) expect := Status{ - Connected: true, - LastAck: lastSendSuccess, - LastNack: lastNack, - LastNackMessage: lastNackMsg, - LastReceiveSuccess: lastRecvSuccess, + Connected: true, + LastAck: lastSendSuccess, + LastNack: lastNack, + LastNackMessage: lastNackMsg, + LastRecvResourceSuccess: lastRecvResourceSuccess, + ImportedServices: map[string]struct{}{ + api.String(): {}, + }, } retry.Run(t, func(r *retry.R) { @@ -491,7 +499,7 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { resp := &pbpeerstream.ReplicationMessage{ Payload: &pbpeerstream.ReplicationMessage_Response_{ Response: &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResourceID: "web", Nonce: "24", @@ -500,9 +508,9 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { }, }, } + lastRecvError = it.FutureNow(1) err := client.Send(resp) require.NoError(t, err) - sequence++ ack, err := client.Recv() require.NoError(t, err) @@ -510,7 +518,7 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { expectNack := &pbpeerstream.ReplicationMessage{ Payload: &pbpeerstream.ReplicationMessage_Request_{ Request: &pbpeerstream.ReplicationMessage_Request{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResponseNonce: "24", Error: &pbstatus.Status{ Code: int32(code.Code_INVALID_ARGUMENT), @@ -521,17 +529,54 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { } prototest.AssertDeepEqual(t, expectNack, ack) - lastRecvError = it.base.Add(time.Duration(sequence) * time.Second).UTC() lastRecvErrorMsg = `unsupported operation: "OPERATION_UNSPECIFIED"` + api := structs.NewServiceName("api", nil) + expect := Status{ Connected: true, LastAck: lastSendSuccess, LastNack: lastNack, LastNackMessage: lastNackMsg, - LastReceiveSuccess: lastRecvSuccess, - LastReceiveError: lastRecvError, - LastReceiveErrorMessage: lastRecvErrorMsg, + LastRecvResourceSuccess: lastRecvResourceSuccess, + LastRecvError: lastRecvError, + LastRecvErrorMessage: lastRecvErrorMsg, + ImportedServices: map[string]struct{}{ + api.String(): {}, + }, + } + + retry.Run(t, func(r *retry.R) { + status, ok := srv.StreamStatus(peerID) + require.True(r, ok) + require.Equal(r, expect, status) + }) + }) + + var lastRecvHeartbeat time.Time + testutil.RunStep(t, "receives heartbeat", func(t *testing.T) { + resp := &pbpeerstream.ReplicationMessage{ + Payload: &pbpeerstream.ReplicationMessage_Heartbeat_{ + Heartbeat: &pbpeerstream.ReplicationMessage_Heartbeat{}, + }, + } + lastRecvHeartbeat = it.FutureNow(1) + err := client.Send(resp) + require.NoError(t, err) + api := structs.NewServiceName("api", nil) + + expect := Status{ + Connected: true, + LastAck: lastSendSuccess, + LastNack: lastNack, + LastNackMessage: lastNackMsg, + LastRecvResourceSuccess: lastRecvResourceSuccess, + LastRecvError: lastRecvError, + LastRecvErrorMessage: lastRecvErrorMsg, + LastRecvHeartbeat: lastRecvHeartbeat, + ImportedServices: map[string]struct{}{ + api.String(): {}, + }, } retry.Run(t, func(r *retry.R) { @@ -542,23 +587,28 @@ func TestStreamResources_Server_StreamTracker(t *testing.T) { }) testutil.RunStep(t, "client disconnect marks stream as disconnected", func(t *testing.T) { + lastRecvError = it.FutureNow(1) + disconnectTime := it.FutureNow(2) + lastRecvErrorMsg = io.EOF.Error() + client.Close() - sequence++ - lastRecvError := it.base.Add(time.Duration(sequence) * time.Second).UTC() - - sequence++ - disconnectTime := it.base.Add(time.Duration(sequence) * time.Second).UTC() + api := structs.NewServiceName("api", nil) expect := Status{ Connected: false, + DisconnectErrorMessage: "stream ended unexpectedly", LastAck: lastSendSuccess, LastNack: lastNack, LastNackMessage: lastNackMsg, DisconnectTime: disconnectTime, - LastReceiveSuccess: lastRecvSuccess, - LastReceiveErrorMessage: io.EOF.Error(), - LastReceiveError: lastRecvError, + LastRecvResourceSuccess: lastRecvResourceSuccess, + LastRecvError: lastRecvError, + LastRecvErrorMessage: lastRecvErrorMsg, + LastRecvHeartbeat: lastRecvHeartbeat, + ImportedServices: map[string]struct{}{ + api.String(): {}, + }, } retry.Run(t, func(r *retry.R) { @@ -639,35 +689,35 @@ func TestStreamResources_Server_ServiceUpdates(t *testing.T) { expectReplEvents(t, client, func(t *testing.T, msg *pbpeerstream.ReplicationMessage) { - require.Equal(t, pbpeerstream.TypeURLRoots, msg.GetResponse().ResourceURL) + require.Equal(t, pbpeerstream.TypeURLPeeringTrustBundle, msg.GetResponse().ResourceURL) // Roots tested in TestStreamResources_Server_CARootUpdates }, func(t *testing.T, msg *pbpeerstream.ReplicationMessage) { // no mongo instances exist - require.Equal(t, pbpeerstream.TypeURLService, msg.GetResponse().ResourceURL) + require.Equal(t, pbpeerstream.TypeURLExportedService, msg.GetResponse().ResourceURL) require.Equal(t, mongoSN, msg.GetResponse().ResourceID) require.Equal(t, pbpeerstream.Operation_OPERATION_DELETE, msg.GetResponse().Operation) require.Nil(t, msg.GetResponse().Resource) }, func(t *testing.T, msg *pbpeerstream.ReplicationMessage) { // proxies can't export because no mesh gateway exists yet - require.Equal(t, pbpeerstream.TypeURLService, msg.GetResponse().ResourceURL) + require.Equal(t, pbpeerstream.TypeURLExportedService, msg.GetResponse().ResourceURL) require.Equal(t, mongoProxySN, msg.GetResponse().ResourceID) require.Equal(t, pbpeerstream.Operation_OPERATION_DELETE, msg.GetResponse().Operation) require.Nil(t, msg.GetResponse().Resource) }, func(t *testing.T, msg *pbpeerstream.ReplicationMessage) { - require.Equal(t, pbpeerstream.TypeURLService, msg.GetResponse().ResourceURL) + require.Equal(t, pbpeerstream.TypeURLExportedService, msg.GetResponse().ResourceURL) require.Equal(t, mysqlSN, msg.GetResponse().ResourceID) require.Equal(t, pbpeerstream.Operation_OPERATION_UPSERT, msg.GetResponse().Operation) - var nodes pbservice.IndexedCheckServiceNodes - require.NoError(t, ptypes.UnmarshalAny(msg.GetResponse().Resource, &nodes)) + var nodes pbpeerstream.ExportedService + require.NoError(t, msg.GetResponse().Resource.UnmarshalTo(&nodes)) require.Len(t, nodes.Nodes, 1) }, func(t *testing.T, msg *pbpeerstream.ReplicationMessage) { // proxies can't export because no mesh gateway exists yet - require.Equal(t, pbpeerstream.TypeURLService, msg.GetResponse().ResourceURL) + require.Equal(t, pbpeerstream.TypeURLExportedService, msg.GetResponse().ResourceURL) require.Equal(t, mysqlProxySN, msg.GetResponse().ResourceID) require.Equal(t, pbpeerstream.Operation_OPERATION_DELETE, msg.GetResponse().Operation) require.Nil(t, msg.GetResponse().Resource) @@ -689,12 +739,12 @@ func TestStreamResources_Server_ServiceUpdates(t *testing.T) { expectReplEvents(t, client, func(t *testing.T, msg *pbpeerstream.ReplicationMessage) { - require.Equal(t, pbpeerstream.TypeURLService, msg.GetResponse().ResourceURL) + require.Equal(t, pbpeerstream.TypeURLExportedService, msg.GetResponse().ResourceURL) require.Equal(t, mongoProxySN, msg.GetResponse().ResourceID) require.Equal(t, pbpeerstream.Operation_OPERATION_UPSERT, msg.GetResponse().Operation) - var nodes pbservice.IndexedCheckServiceNodes - require.NoError(t, ptypes.UnmarshalAny(msg.GetResponse().Resource, &nodes)) + var nodes pbpeerstream.ExportedService + require.NoError(t, msg.GetResponse().Resource.UnmarshalTo(&nodes)) require.Len(t, nodes.Nodes, 1) pm := nodes.Nodes[0].Service.Connect.PeerMeta @@ -706,12 +756,12 @@ func TestStreamResources_Server_ServiceUpdates(t *testing.T) { require.Equal(t, spiffeIDs, pm.SpiffeID) }, func(t *testing.T, msg *pbpeerstream.ReplicationMessage) { - require.Equal(t, pbpeerstream.TypeURLService, msg.GetResponse().ResourceURL) + require.Equal(t, pbpeerstream.TypeURLExportedService, msg.GetResponse().ResourceURL) require.Equal(t, mysqlProxySN, msg.GetResponse().ResourceID) require.Equal(t, pbpeerstream.Operation_OPERATION_UPSERT, msg.GetResponse().Operation) - var nodes pbservice.IndexedCheckServiceNodes - require.NoError(t, ptypes.UnmarshalAny(msg.GetResponse().Resource, &nodes)) + var nodes pbpeerstream.ExportedService + require.NoError(t, msg.GetResponse().Resource.UnmarshalTo(&nodes)) require.Len(t, nodes.Nodes, 1) pm := nodes.Nodes[0].Service.Connect.PeerMeta @@ -743,8 +793,8 @@ func TestStreamResources_Server_ServiceUpdates(t *testing.T) { require.Equal(r, pbpeerstream.Operation_OPERATION_UPSERT, msg.GetResponse().Operation) require.Equal(r, mongo.Service.CompoundServiceName().String(), msg.GetResponse().ResourceID) - var nodes pbservice.IndexedCheckServiceNodes - require.NoError(r, ptypes.UnmarshalAny(msg.GetResponse().Resource, &nodes)) + var nodes pbpeerstream.ExportedService + require.NoError(t, msg.GetResponse().Resource.UnmarshalTo(&nodes)) require.Len(r, nodes.Nodes, 1) }) }) @@ -809,12 +859,12 @@ func TestStreamResources_Server_CARootUpdates(t *testing.T) { testutil.RunStep(t, "initial CA Roots replication", func(t *testing.T) { expectReplEvents(t, client, func(t *testing.T, msg *pbpeerstream.ReplicationMessage) { - require.Equal(t, pbpeerstream.TypeURLRoots, msg.GetResponse().ResourceURL) + require.Equal(t, pbpeerstream.TypeURLPeeringTrustBundle, msg.GetResponse().ResourceURL) require.Equal(t, "roots", msg.GetResponse().ResourceID) require.Equal(t, pbpeerstream.Operation_OPERATION_UPSERT, msg.GetResponse().Operation) var trustBundle pbpeering.PeeringTrustBundle - require.NoError(t, ptypes.UnmarshalAny(msg.GetResponse().Resource, &trustBundle)) + require.NoError(t, msg.GetResponse().Resource.UnmarshalTo(&trustBundle)) require.ElementsMatch(t, []string{rootA.RootCert}, trustBundle.RootPEMs) expect := connect.SpiffeIDSigningForCluster(clusterID).Host() @@ -838,12 +888,12 @@ func TestStreamResources_Server_CARootUpdates(t *testing.T) { expectReplEvents(t, client, func(t *testing.T, msg *pbpeerstream.ReplicationMessage) { - require.Equal(t, pbpeerstream.TypeURLRoots, msg.GetResponse().ResourceURL) + require.Equal(t, pbpeerstream.TypeURLPeeringTrustBundle, msg.GetResponse().ResourceURL) require.Equal(t, "roots", msg.GetResponse().ResourceID) require.Equal(t, pbpeerstream.Operation_OPERATION_UPSERT, msg.GetResponse().Operation) var trustBundle pbpeering.PeeringTrustBundle - require.NoError(t, ptypes.UnmarshalAny(msg.GetResponse().Resource, &trustBundle)) + require.NoError(t, msg.GetResponse().Resource.UnmarshalTo(&trustBundle)) require.ElementsMatch(t, []string{rootB.RootCert, rootC.RootCert}, trustBundle.RootPEMs) expect := connect.SpiffeIDSigningForCluster(clusterID).Host() @@ -853,6 +903,200 @@ func TestStreamResources_Server_CARootUpdates(t *testing.T) { }) } +// Test that when the client doesn't send a heartbeat in time, the stream is disconnected. +func TestStreamResources_Server_DisconnectsOnHeartbeatTimeout(t *testing.T) { + it := incrementalTime{ + base: time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC), + } + + srv, store := newTestServer(t, func(c *Config) { + c.Tracker.SetClock(it.Now) + c.incomingHeartbeatTimeout = 5 * time.Millisecond + }) + + p := writePeeringToBeDialed(t, store, 1, "my-peer") + require.Empty(t, p.PeerID, "should be empty if being dialed") + peerID := p.ID + + // Set the initial roots and CA configuration. + _, _ = writeInitialRootsAndCA(t, store) + + client := makeClient(t, srv, peerID) + + // TODO(peering): test fails if we don't drain the stream with this call because the + // server gets blocked sending the termination message. Figure out a way to let + // messages queue and filter replication messages. + receiveRoots, err := client.Recv() + require.NoError(t, err) + require.NotNil(t, receiveRoots.GetResponse()) + require.Equal(t, pbpeerstream.TypeURLPeeringTrustBundle, receiveRoots.GetResponse().ResourceURL) + + testutil.RunStep(t, "new stream gets tracked", func(t *testing.T) { + retry.Run(t, func(r *retry.R) { + status, ok := srv.StreamStatus(peerID) + require.True(r, ok) + require.True(r, status.Connected) + }) + }) + + testutil.RunStep(t, "stream is disconnected due to heartbeat timeout", func(t *testing.T) { + disconnectTime := it.FutureNow(1) + retry.Run(t, func(r *retry.R) { + status, ok := srv.StreamStatus(peerID) + require.True(r, ok) + require.False(r, status.Connected) + require.Equal(r, "heartbeat timeout", status.DisconnectErrorMessage) + require.Equal(r, disconnectTime, status.DisconnectTime) + }) + }) +} + +// Test that the server sends heartbeats at the expected interval. +func TestStreamResources_Server_SendsHeartbeats(t *testing.T) { + it := incrementalTime{ + base: time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC), + } + outgoingHeartbeatInterval := 5 * time.Millisecond + + srv, store := newTestServer(t, func(c *Config) { + c.Tracker.SetClock(it.Now) + c.outgoingHeartbeatInterval = outgoingHeartbeatInterval + }) + + p := writePeeringToBeDialed(t, store, 1, "my-peer") + require.Empty(t, p.PeerID, "should be empty if being dialed") + peerID := p.ID + + // Set the initial roots and CA configuration. + _, _ = writeInitialRootsAndCA(t, store) + + client := makeClient(t, srv, peerID) + + // TODO(peering): test fails if we don't drain the stream with this call because the + // server gets blocked sending the termination message. Figure out a way to let + // messages queue and filter replication messages. + receiveRoots, err := client.Recv() + require.NoError(t, err) + require.NotNil(t, receiveRoots.GetResponse()) + require.Equal(t, pbpeerstream.TypeURLPeeringTrustBundle, receiveRoots.GetResponse().ResourceURL) + + testutil.RunStep(t, "new stream gets tracked", func(t *testing.T) { + retry.Run(t, func(r *retry.R) { + status, ok := srv.StreamStatus(peerID) + require.True(r, ok) + require.True(r, status.Connected) + }) + }) + + testutil.RunStep(t, "sends first heartbeat", func(t *testing.T) { + retry.RunWith(&retry.Timer{ + Timeout: outgoingHeartbeatInterval * 2, + Wait: outgoingHeartbeatInterval / 2, + }, t, func(r *retry.R) { + heartbeat, err := client.Recv() + require.NoError(t, err) + require.NotNil(t, heartbeat.GetHeartbeat()) + }) + }) + + testutil.RunStep(t, "sends second heartbeat", func(t *testing.T) { + retry.RunWith(&retry.Timer{ + Timeout: outgoingHeartbeatInterval * 2, + Wait: outgoingHeartbeatInterval / 2, + }, t, func(r *retry.R) { + heartbeat, err := client.Recv() + require.NoError(t, err) + require.NotNil(t, heartbeat.GetHeartbeat()) + }) + }) +} + +// Test that as long as the server receives heartbeats it keeps the connection open. +func TestStreamResources_Server_KeepsConnectionOpenWithHeartbeat(t *testing.T) { + it := incrementalTime{ + base: time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC), + } + incomingHeartbeatTimeout := 10 * time.Millisecond + + srv, store := newTestServer(t, func(c *Config) { + c.Tracker.SetClock(it.Now) + c.incomingHeartbeatTimeout = incomingHeartbeatTimeout + }) + + p := writePeeringToBeDialed(t, store, 1, "my-peer") + require.Empty(t, p.PeerID, "should be empty if being dialed") + peerID := p.ID + + // Set the initial roots and CA configuration. + _, _ = writeInitialRootsAndCA(t, store) + + client := makeClient(t, srv, peerID) + + // TODO(peering): test fails if we don't drain the stream with this call because the + // server gets blocked sending the termination message. Figure out a way to let + // messages queue and filter replication messages. + receiveRoots, err := client.Recv() + require.NoError(t, err) + require.NotNil(t, receiveRoots.GetResponse()) + require.Equal(t, pbpeerstream.TypeURLPeeringTrustBundle, receiveRoots.GetResponse().ResourceURL) + + testutil.RunStep(t, "new stream gets tracked", func(t *testing.T) { + retry.Run(t, func(r *retry.R) { + status, ok := srv.StreamStatus(peerID) + require.True(r, ok) + require.True(r, status.Connected) + }) + }) + + heartbeatMsg := &pbpeerstream.ReplicationMessage{ + Payload: &pbpeerstream.ReplicationMessage_Heartbeat_{ + Heartbeat: &pbpeerstream.ReplicationMessage_Heartbeat{}}} + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + // errCh is used to collect any send errors from within the goroutine. + errCh := make(chan error) + + // Set up a goroutine to send the heartbeat every 1/2 of the timeout. + go func() { + // This is just a do while loop. We want to send the heartbeat right away to start + // because the test setup above takes some time and we might be close to the heartbeat + // timeout already. + for { + err := client.Send(heartbeatMsg) + if err != nil { + select { + case errCh <- err: + case <-ctx.Done(): + } + return + } + select { + case <-time.After(incomingHeartbeatTimeout / 2): + case <-ctx.Done(): + close(errCh) + return + } + } + }() + + // Assert that the stream remains connected for 5 heartbeat timeouts. + require.Never(t, func() bool { + status, ok := srv.StreamStatus(peerID) + if !ok { + return true + } + return !status.Connected + }, incomingHeartbeatTimeout*5, incomingHeartbeatTimeout) + + // Kill the heartbeat sending goroutine and check if it had any errors. + cancel() + err, ok := <-errCh + if ok { + require.NoError(t, err) + } +} + // makeClient sets up a *MockClient with the initial subscription // message handshake. func makeClient(t *testing.T, srv pbpeerstream.PeerStreamServiceServer, peerID string) *MockClient { @@ -871,33 +1115,57 @@ func makeClient(t *testing.T, srv pbpeerstream.PeerStreamServiceServer, peerID s } }() - // Issue a services subscription to server - init := &pbpeerstream.ReplicationMessage{ - Payload: &pbpeerstream.ReplicationMessage_Request_{ - Request: &pbpeerstream.ReplicationMessage_Request{ - PeerID: peerID, - ResourceURL: pbpeerstream.TypeURLService, + // Issue a services and roots subscription pair to server + for _, resourceURL := range []string{ + pbpeerstream.TypeURLExportedService, + pbpeerstream.TypeURLPeeringTrustBundle, + } { + init := &pbpeerstream.ReplicationMessage{ + Payload: &pbpeerstream.ReplicationMessage_Request_{ + Request: &pbpeerstream.ReplicationMessage_Request{ + PeerID: peerID, + ResourceURL: resourceURL, + }, }, - }, + } + require.NoError(t, client.Send(init)) } - require.NoError(t, client.Send(init)) - // Receive a services subscription from server - receivedSub, err := client.Recv() + // Receive a services and roots subscription request pair from server + receivedSub1, err := client.Recv() + require.NoError(t, err) + receivedSub2, err := client.Recv() require.NoError(t, err) - expect := &pbpeerstream.ReplicationMessage{ - Payload: &pbpeerstream.ReplicationMessage_Request_{ - Request: &pbpeerstream.ReplicationMessage_Request{ - ResourceURL: pbpeerstream.TypeURLService, - // The PeerID field is only set for the messages coming FROM - // the establishing side and are going to be empty from the - // other side. - PeerID: "", + expect := []*pbpeerstream.ReplicationMessage{ + { + Payload: &pbpeerstream.ReplicationMessage_Request_{ + Request: &pbpeerstream.ReplicationMessage_Request{ + ResourceURL: pbpeerstream.TypeURLExportedService, + // The PeerID field is only set for the messages coming FROM + // the establishing side and are going to be empty from the + // other side. + PeerID: "", + }, + }, + }, + { + Payload: &pbpeerstream.ReplicationMessage_Request_{ + Request: &pbpeerstream.ReplicationMessage_Request{ + ResourceURL: pbpeerstream.TypeURLPeeringTrustBundle, + // The PeerID field is only set for the messages coming FROM + // the establishing side and are going to be empty from the + // other side. + PeerID: "", + }, }, }, } - prototest.AssertDeepEqual(t, expect, receivedSub) + got := []*pbpeerstream.ReplicationMessage{ + receivedSub1, + receivedSub2, + } + prototest.AssertElementsMatch[*pbpeerstream.ReplicationMessage](t, expect, got) return client } @@ -967,7 +1235,57 @@ func (b *testStreamBackend) CatalogDeregister(req *structs.DeregisterRequest) er return nil } +func Test_makeServiceResponse_ExportedServicesCount(t *testing.T) { + peerName := "billing" + peerID := "1fabcd52-1d46-49b0-b1d8-71559aee47f5" + + srv, store := newTestServer(t, nil) + require.NoError(t, store.PeeringWrite(31, &pbpeering.Peering{ + ID: peerID, + Name: peerName}, + )) + + // connect the stream + mst, err := srv.Tracker.Connected(peerID) + require.NoError(t, err) + + testutil.RunStep(t, "simulate an update to export a service", func(t *testing.T) { + update := cache.UpdateEvent{ + CorrelationID: subExportedService + "api", + Result: &pbservice.IndexedCheckServiceNodes{ + Nodes: []*pbservice.CheckServiceNode{ + { + Service: &pbservice.NodeService{ + ID: "api-1", + Service: "api", + PeerName: peerName, + }, + }, + }, + }} + _, err := makeServiceResponse(srv.Logger, mst, update) + require.NoError(t, err) + + require.Equal(t, 1, mst.GetExportedServicesCount()) + }) + + testutil.RunStep(t, "simulate a delete for an exported service", func(t *testing.T) { + update := cache.UpdateEvent{ + CorrelationID: subExportedService + "api", + Result: &pbservice.IndexedCheckServiceNodes{ + Nodes: []*pbservice.CheckServiceNode{}, + }} + _, err := makeServiceResponse(srv.Logger, mst, update) + require.NoError(t, err) + + require.Equal(t, 0, mst.GetExportedServicesCount()) + }) +} + func Test_processResponse_Validation(t *testing.T) { + peerName := "billing" + peerID := "1fabcd52-1d46-49b0-b1d8-71559aee47f5" + type testCase struct { name string in *pbpeerstream.ReplicationMessage_Response @@ -975,10 +1293,18 @@ func Test_processResponse_Validation(t *testing.T) { wantErr bool } - srv, _ := newTestServer(t, nil) + srv, store := newTestServer(t, nil) + require.NoError(t, store.PeeringWrite(31, &pbpeering.Peering{ + ID: peerID, + Name: peerName}, + )) + + // connect the stream + mst, err := srv.Tracker.Connected(peerID) + require.NoError(t, err) run := func(t *testing.T, tc testCase) { - reply, err := srv.processResponse("", "", tc.in) + reply, err := srv.processResponse(peerName, "", mst, tc.in, srv.Logger) if tc.wantErr { require.Error(t, err) } else { @@ -991,16 +1317,16 @@ func Test_processResponse_Validation(t *testing.T) { { name: "valid upsert", in: &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResourceID: "api", Nonce: "1", Operation: pbpeerstream.Operation_OPERATION_UPSERT, - Resource: makeAnyPB(t, &pbservice.IndexedCheckServiceNodes{}), + Resource: makeAnyPB(t, &pbpeerstream.ExportedService{}), }, expect: &pbpeerstream.ReplicationMessage{ Payload: &pbpeerstream.ReplicationMessage_Request_{ Request: &pbpeerstream.ReplicationMessage_Request{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResponseNonce: "1", }, }, @@ -1010,7 +1336,7 @@ func Test_processResponse_Validation(t *testing.T) { { name: "valid delete", in: &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResourceID: "api", Nonce: "1", Operation: pbpeerstream.Operation_OPERATION_DELETE, @@ -1018,7 +1344,7 @@ func Test_processResponse_Validation(t *testing.T) { expect: &pbpeerstream.ReplicationMessage{ Payload: &pbpeerstream.ReplicationMessage_Request_{ Request: &pbpeerstream.ReplicationMessage_Request{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResponseNonce: "1", }, }, @@ -1049,14 +1375,14 @@ func Test_processResponse_Validation(t *testing.T) { { name: "unknown operation", in: &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, Nonce: "1", Operation: pbpeerstream.Operation_OPERATION_UNSPECIFIED, }, expect: &pbpeerstream.ReplicationMessage{ Payload: &pbpeerstream.ReplicationMessage_Request_{ Request: &pbpeerstream.ReplicationMessage_Request{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResponseNonce: "1", Error: &pbstatus.Status{ Code: int32(code.Code_INVALID_ARGUMENT), @@ -1070,14 +1396,14 @@ func Test_processResponse_Validation(t *testing.T) { { name: "out of range operation", in: &pbpeerstream.ReplicationMessage_Response{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, Nonce: "1", Operation: pbpeerstream.Operation(100000), }, expect: &pbpeerstream.ReplicationMessage{ Payload: &pbpeerstream.ReplicationMessage_Request_{ Request: &pbpeerstream.ReplicationMessage_Request{ - ResourceURL: pbpeerstream.TypeURLService, + ResourceURL: pbpeerstream.TypeURLExportedService, ResponseNonce: "1", Error: &pbstatus.Status{ Code: int32(code.Code_INVALID_ARGUMENT), @@ -1137,8 +1463,8 @@ func writeInitialRootsAndCA(t *testing.T, store *state.Store) (string, *structs. return clusterID, rootA } -func makeAnyPB(t *testing.T, pb proto.Message) *any.Any { - any, err := ptypes.MarshalAny(pb) +func makeAnyPB(t *testing.T, pb proto.Message) *anypb.Any { + any, err := anypb.New(pb) require.NoError(t, err) return any } @@ -1218,8 +1544,8 @@ func expectReplEvents(t *testing.T, client *MockClient, checkFns ...func(t *test } } -func TestHandleUpdateService(t *testing.T) { - srv, _ := newTestServer(t, func(c *Config) { +func Test_processResponse_handleUpsert_handleDelete(t *testing.T) { + srv, store := newTestServer(t, func(c *Config) { backend := c.Backend.(*testStreamBackend) backend.leader = func() bool { return false @@ -1227,13 +1553,15 @@ func TestHandleUpdateService(t *testing.T) { }) type testCase struct { - name string - seed []*structs.RegisterRequest - input *pbservice.IndexedCheckServiceNodes - expect map[string]structs.CheckServiceNodes + name string + seed []*structs.RegisterRequest + input *pbpeerstream.ExportedService + expect map[string]structs.CheckServiceNodes + expectedImportedServicesCount int } peerName := "billing" + peerID := "1fabcd52-1d46-49b0-b1d8-71559aee47f5" remoteMeta := pbcommon.NewEnterpriseMetaFromStructs(*structs.DefaultEnterpriseMetaInPartition("billing-ap")) // "api" service is imported from the billing-ap partition, corresponding to the billing peer. @@ -1241,14 +1569,43 @@ func TestHandleUpdateService(t *testing.T) { defaultMeta := *acl.DefaultEnterpriseMeta() apiSN := structs.NewServiceName("api", &defaultMeta) + // create a peering in the state store + require.NoError(t, store.PeeringWrite(31, &pbpeering.Peering{ + ID: peerID, + Name: peerName}, + )) + + // connect the stream + mst, err := srv.Tracker.Connected(peerID) + require.NoError(t, err) + run := func(t *testing.T, tc testCase) { // Seed the local catalog with some data to reconcile against. + // and increment the tracker's imported services count for _, reg := range tc.seed { require.NoError(t, srv.Backend.CatalogRegister(reg)) + + mst.TrackImportedService(reg.Service.CompoundServiceName()) + } + + var op pbpeerstream.Operation + if len(tc.input.Nodes) == 0 { + op = pbpeerstream.Operation_OPERATION_DELETE + } else { + op = pbpeerstream.Operation_OPERATION_UPSERT + } + + in := &pbpeerstream.ReplicationMessage_Response{ + ResourceURL: pbpeerstream.TypeURLExportedService, + ResourceID: apiSN.String(), + Nonce: "1", + Operation: op, + Resource: makeAnyPB(t, tc.input), } // Simulate an update arriving for billing/api. - require.NoError(t, srv.handleUpdateService(peerName, acl.DefaultPartitionName, apiSN, tc.input)) + _, err = srv.processResponse(peerName, acl.DefaultPartitionName, mst, in, srv.Logger) + require.NoError(t, err) for svc, expect := range tc.expect { t.Run(svc, func(t *testing.T) { @@ -1257,12 +1614,15 @@ func TestHandleUpdateService(t *testing.T) { requireEqualInstances(t, expect, got) }) } + + // assert the imported services count modifications + require.Equal(t, tc.expectedImportedServicesCount, mst.GetImportedServicesCount()) } tt := []testCase{ { name: "upsert two service instances to the same node", - input: &pbservice.IndexedCheckServiceNodes{ + input: &pbpeerstream.ExportedService{ Nodes: []*pbservice.CheckServiceNode{ { Node: &pbservice.Node{ @@ -1390,10 +1750,11 @@ func TestHandleUpdateService(t *testing.T) { }, }, }, + expectedImportedServicesCount: 1, }, { name: "upsert two service instances to different nodes", - input: &pbservice.IndexedCheckServiceNodes{ + input: &pbpeerstream.ExportedService{ Nodes: []*pbservice.CheckServiceNode{ { Node: &pbservice.Node{ @@ -1521,6 +1882,7 @@ func TestHandleUpdateService(t *testing.T) { }, }, }, + expectedImportedServicesCount: 1, }, { name: "receiving a nil input leads to deleting data in the catalog", @@ -1574,10 +1936,11 @@ func TestHandleUpdateService(t *testing.T) { }, }, }, - input: nil, + input: &pbpeerstream.ExportedService{}, expect: map[string]structs.CheckServiceNodes{ "api": {}, }, + expectedImportedServicesCount: 0, }, { name: "deleting one service name from a node does not delete other service names", @@ -1632,7 +1995,7 @@ func TestHandleUpdateService(t *testing.T) { }, }, // Nil input is for the "api" service. - input: nil, + input: &pbpeerstream.ExportedService{}, expect: map[string]structs.CheckServiceNodes{ "api": {}, // Existing redis service was not affected by deletion. @@ -1668,6 +2031,7 @@ func TestHandleUpdateService(t *testing.T) { }, }, }, + expectedImportedServicesCount: 1, }, { name: "service checks are cleaned up when not present in a response", @@ -1697,7 +2061,7 @@ func TestHandleUpdateService(t *testing.T) { }, }, }, - input: &pbservice.IndexedCheckServiceNodes{ + input: &pbpeerstream.ExportedService{ Nodes: []*pbservice.CheckServiceNode{ { Node: &pbservice.Node{ @@ -1738,6 +2102,7 @@ func TestHandleUpdateService(t *testing.T) { }, }, }, + expectedImportedServicesCount: 2, }, { name: "node checks are cleaned up when not present in a response", @@ -1791,7 +2156,7 @@ func TestHandleUpdateService(t *testing.T) { }, }, }, - input: &pbservice.IndexedCheckServiceNodes{ + input: &pbpeerstream.ExportedService{ Nodes: []*pbservice.CheckServiceNode{ { Node: &pbservice.Node{ @@ -1872,6 +2237,7 @@ func TestHandleUpdateService(t *testing.T) { }, }, }, + expectedImportedServicesCount: 2, }, { name: "replacing a service instance on a node cleans up the old instance", @@ -1925,7 +2291,7 @@ func TestHandleUpdateService(t *testing.T) { }, }, }, - input: &pbservice.IndexedCheckServiceNodes{ + input: &pbpeerstream.ExportedService{ Nodes: []*pbservice.CheckServiceNode{ { Node: &pbservice.Node{ @@ -2019,6 +2385,7 @@ func TestHandleUpdateService(t *testing.T) { }, }, }, + expectedImportedServicesCount: 2, }, } diff --git a/agent/grpc-external/services/peerstream/stream_tracker.go b/agent/grpc-external/services/peerstream/stream_tracker.go index 5ec0f7ebf..f7a451595 100644 --- a/agent/grpc-external/services/peerstream/stream_tracker.go +++ b/agent/grpc-external/services/peerstream/stream_tracker.go @@ -4,9 +4,11 @@ import ( "fmt" "sync" "time" + + "github.com/hashicorp/consul/agent/structs" ) -// Tracker contains a map of (PeerID -> Status). +// Tracker contains a map of (PeerID -> MutableStatus). // As streams are opened and closed we track details about their status. type Tracker struct { mu sync.RWMutex @@ -31,16 +33,37 @@ func (t *Tracker) SetClock(clock func() time.Time) { } } +// Register a stream for a given peer but do not mark it as connected. +func (t *Tracker) Register(id string) (*MutableStatus, error) { + t.mu.Lock() + defer t.mu.Unlock() + status, _, err := t.registerLocked(id, false) + return status, err +} + +func (t *Tracker) registerLocked(id string, initAsConnected bool) (*MutableStatus, bool, error) { + status, ok := t.streams[id] + if !ok { + status = newMutableStatus(t.timeNow, initAsConnected) + t.streams[id] = status + return status, true, nil + } + return status, false, nil +} + // Connected registers a stream for a given peer, and marks it as connected. // It also enforces that there is only one active stream for a peer. func (t *Tracker) Connected(id string) (*MutableStatus, error) { t.mu.Lock() defer t.mu.Unlock() + return t.connectedLocked(id) +} - status, ok := t.streams[id] - if !ok { - status = newMutableStatus(t.timeNow) - t.streams[id] = status +func (t *Tracker) connectedLocked(id string) (*MutableStatus, error) { + status, newlyRegistered, err := t.registerLocked(id, true) + if err != nil { + return nil, err + } else if newlyRegistered { return status, nil } @@ -52,13 +75,23 @@ func (t *Tracker) Connected(id string) (*MutableStatus, error) { return status, nil } -// Disconnected ensures that if a peer id's stream status is tracked, it is marked as disconnected. -func (t *Tracker) Disconnected(id string) { +// DisconnectedGracefully marks the peer id's stream status as disconnected gracefully. +func (t *Tracker) DisconnectedGracefully(id string) { t.mu.Lock() defer t.mu.Unlock() if status, ok := t.streams[id]; ok { - status.TrackDisconnected() + status.TrackDisconnectedGracefully() + } +} + +// DisconnectedDueToError marks the peer id's stream status as disconnected due to an error. +func (t *Tracker) DisconnectedDueToError(id string, error string) { + t.mu.Lock() + defer t.mu.Unlock() + + if status, ok := t.streams[id]; ok { + status.TrackDisconnectedDueToError(error) } } @@ -112,6 +145,10 @@ type Status struct { // Connected is true when there is an open stream for the peer. Connected bool + // DisconnectErrorMessage tracks the error that caused the stream to disconnect non-gracefully. + // If the stream is connected or it disconnected gracefully it will be empty. + DisconnectErrorMessage string + // If the status is not connected, DisconnectTime tracks when the stream was closed. Else it's zero. DisconnectTime time.Time @@ -130,24 +167,39 @@ type Status struct { // LastSendErrorMessage tracks the last error message when sending into the stream. LastSendErrorMessage string - // LastReceiveSuccess tracks the time we last successfully stored a resource replicated FROM the peer. - LastReceiveSuccess time.Time + // LastRecvHeartbeat tracks when we last received a heartbeat from our peer. + LastRecvHeartbeat time.Time - // LastReceiveError tracks either: + // LastRecvResourceSuccess tracks the time we last successfully stored a resource replicated FROM the peer. + LastRecvResourceSuccess time.Time + + // LastRecvError tracks either: // - The time we failed to store a resource replicated FROM the peer. // - The time of the last error when receiving from the stream. - LastReceiveError time.Time + LastRecvError time.Time - // LastReceiveError tracks either: - // - The error message when we failed to store a resource replicated FROM the peer. - // - The last error message when receiving from the stream. - LastReceiveErrorMessage string + // LastRecvErrorMessage tracks the last error message when receiving from the stream. + LastRecvErrorMessage string + + // TODO(peering): consider keeping track of imported and exported services thru raft + // ImportedServices keeps track of which service names are imported for the peer + ImportedServices map[string]struct{} + // ExportedServices keeps track of which service names a peer asks to export + ExportedServices map[string]struct{} } -func newMutableStatus(now func() time.Time) *MutableStatus { +func (s *Status) GetImportedServicesCount() uint64 { + return uint64(len(s.ImportedServices)) +} + +func (s *Status) GetExportedServicesCount() uint64 { + return uint64(len(s.ExportedServices)) +} + +func newMutableStatus(now func() time.Time, connected bool) *MutableStatus { return &MutableStatus{ Status: Status{ - Connected: true, + Connected: connected, }, timeNow: now, doneCh: make(chan struct{}), @@ -171,16 +223,24 @@ func (s *MutableStatus) TrackSendError(error string) { s.mu.Unlock() } -func (s *MutableStatus) TrackReceiveSuccess() { +// TrackRecvResourceSuccess tracks receiving a replicated resource. +func (s *MutableStatus) TrackRecvResourceSuccess() { s.mu.Lock() - s.LastReceiveSuccess = s.timeNow().UTC() + s.LastRecvResourceSuccess = s.timeNow().UTC() s.mu.Unlock() } -func (s *MutableStatus) TrackReceiveError(error string) { +// TrackRecvHeartbeat tracks receiving a heartbeat from our peer. +func (s *MutableStatus) TrackRecvHeartbeat() { s.mu.Lock() - s.LastReceiveError = s.timeNow().UTC() - s.LastReceiveErrorMessage = error + s.LastRecvHeartbeat = s.timeNow().UTC() + s.mu.Unlock() +} + +func (s *MutableStatus) TrackRecvError(error string) { + s.mu.Lock() + s.LastRecvError = s.timeNow().UTC() + s.LastRecvErrorMessage = error s.mu.Unlock() } @@ -195,13 +255,27 @@ func (s *MutableStatus) TrackConnected() { s.mu.Lock() s.Connected = true s.DisconnectTime = time.Time{} + s.DisconnectErrorMessage = "" s.mu.Unlock() } -func (s *MutableStatus) TrackDisconnected() { +// TrackDisconnectedGracefully tracks when the stream was disconnected in a way we expected. +// For example, we got a terminated message, or we terminated the stream ourselves. +func (s *MutableStatus) TrackDisconnectedGracefully() { s.mu.Lock() s.Connected = false s.DisconnectTime = s.timeNow().UTC() + s.DisconnectErrorMessage = "" + s.mu.Unlock() +} + +// TrackDisconnectedDueToError tracks when the stream was disconnected due to an error. +// For example the heartbeat timed out, or we couldn't send into the stream. +func (s *MutableStatus) TrackDisconnectedDueToError(error string) { + s.mu.Lock() + s.Connected = false + s.DisconnectTime = s.timeNow().UTC() + s.DisconnectErrorMessage = error s.mu.Unlock() } @@ -222,3 +296,53 @@ func (s *MutableStatus) GetStatus() Status { return copy } + +func (s *MutableStatus) RemoveImportedService(sn structs.ServiceName) { + s.mu.Lock() + defer s.mu.Unlock() + + delete(s.ImportedServices, sn.String()) +} + +func (s *MutableStatus) TrackImportedService(sn structs.ServiceName) { + s.mu.Lock() + defer s.mu.Unlock() + + if s.ImportedServices == nil { + s.ImportedServices = make(map[string]struct{}) + } + + s.ImportedServices[sn.String()] = struct{}{} +} + +func (s *MutableStatus) GetImportedServicesCount() int { + s.mu.RLock() + defer s.mu.RUnlock() + + return len(s.ImportedServices) +} + +func (s *MutableStatus) RemoveExportedService(sn structs.ServiceName) { + s.mu.Lock() + defer s.mu.Unlock() + + delete(s.ExportedServices, sn.String()) +} + +func (s *MutableStatus) TrackExportedService(sn structs.ServiceName) { + s.mu.Lock() + defer s.mu.Unlock() + + if s.ExportedServices == nil { + s.ExportedServices = make(map[string]struct{}) + } + + s.ExportedServices[sn.String()] = struct{}{} +} + +func (s *MutableStatus) GetExportedServicesCount() int { + s.mu.RLock() + defer s.mu.RUnlock() + + return len(s.ExportedServices) +} diff --git a/agent/grpc-external/services/peerstream/stream_tracker_test.go b/agent/grpc-external/services/peerstream/stream_tracker_test.go index a698ccc6f..f7a9df321 100644 --- a/agent/grpc-external/services/peerstream/stream_tracker_test.go +++ b/agent/grpc-external/services/peerstream/stream_tracker_test.go @@ -62,7 +62,7 @@ func TestTracker_EnsureConnectedDisconnected(t *testing.T) { }) testutil.RunStep(t, "disconnect", func(t *testing.T) { - tracker.Disconnected(peerID) + tracker.DisconnectedGracefully(peerID) sequence++ expect := Status{ @@ -147,7 +147,7 @@ func TestTracker_connectedStreams(t *testing.T) { require.NoError(t, err) // Mark foo as disconnected to avoid showing it as an active stream - status.TrackDisconnected() + status.TrackDisconnectedGracefully() _, err = s.Connected("bar") require.NoError(t, err) @@ -162,3 +162,61 @@ func TestTracker_connectedStreams(t *testing.T) { }) } } + +func TestMutableStatus_TrackConnected(t *testing.T) { + s := MutableStatus{ + Status: Status{ + Connected: false, + DisconnectTime: time.Now(), + DisconnectErrorMessage: "disconnected", + }, + } + s.TrackConnected() + + require.True(t, s.IsConnected()) + require.True(t, s.Connected) + require.Equal(t, time.Time{}, s.DisconnectTime) + require.Empty(t, s.DisconnectErrorMessage) +} + +func TestMutableStatus_TrackDisconnectedGracefully(t *testing.T) { + it := incrementalTime{ + base: time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC), + } + disconnectTime := it.FutureNow(1) + + s := MutableStatus{ + timeNow: it.Now, + Status: Status{ + Connected: true, + }, + } + + s.TrackDisconnectedGracefully() + + require.False(t, s.IsConnected()) + require.False(t, s.Connected) + require.Equal(t, disconnectTime, s.DisconnectTime) + require.Empty(t, s.DisconnectErrorMessage) +} + +func TestMutableStatus_TrackDisconnectedDueToError(t *testing.T) { + it := incrementalTime{ + base: time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC), + } + disconnectTime := it.FutureNow(1) + + s := MutableStatus{ + timeNow: it.Now, + Status: Status{ + Connected: true, + }, + } + + s.TrackDisconnectedDueToError("disconnect err") + + require.False(t, s.IsConnected()) + require.False(t, s.Connected) + require.Equal(t, disconnectTime, s.DisconnectTime) + require.Equal(t, "disconnect err", s.DisconnectErrorMessage) +} diff --git a/agent/grpc-external/services/peerstream/subscription_blocking.go b/agent/grpc-external/services/peerstream/subscription_blocking.go index c2720dcdb..d11e03d55 100644 --- a/agent/grpc-external/services/peerstream/subscription_blocking.go +++ b/agent/grpc-external/services/peerstream/subscription_blocking.go @@ -19,6 +19,13 @@ import ( // streaming machinery instead to be cheaper. func (m *subscriptionManager) notifyExportedServicesForPeerID(ctx context.Context, state *subscriptionState, peerID string) { + // Wait until this is subscribed-to. + select { + case <-m.serviceSubReady: + case <-ctx.Done(): + return + } + // syncSubscriptionsAndBlock ensures that the subscriptions to the subscription backend // match the list of services exported to the peer. m.syncViaBlockingQuery(ctx, "exported-services", func(ctx context.Context, store StateStore, ws memdb.WatchSet) (interface{}, error) { @@ -34,6 +41,13 @@ func (m *subscriptionManager) notifyExportedServicesForPeerID(ctx context.Contex // TODO: add a new streaming subscription type to list-by-kind-and-partition since we're getting evictions func (m *subscriptionManager) notifyMeshGatewaysForPartition(ctx context.Context, state *subscriptionState, partition string) { + // Wait until this is subscribed-to. + select { + case <-m.serviceSubReady: + case <-ctx.Done(): + return + } + m.syncViaBlockingQuery(ctx, "mesh-gateways", func(ctx context.Context, store StateStore, ws memdb.WatchSet) (interface{}, error) { // Fetch our current list of all mesh gateways. entMeta := structs.DefaultEnterpriseMetaInPartition(partition) diff --git a/agent/grpc-external/services/peerstream/subscription_manager.go b/agent/grpc-external/services/peerstream/subscription_manager.go index 33726a216..0c69b0338 100644 --- a/agent/grpc-external/services/peerstream/subscription_manager.go +++ b/agent/grpc-external/services/peerstream/subscription_manager.go @@ -19,6 +19,7 @@ import ( "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/proto/pbcommon" "github.com/hashicorp/consul/proto/pbpeering" + "github.com/hashicorp/consul/proto/pbpeerstream" "github.com/hashicorp/consul/proto/pbservice" ) @@ -33,12 +34,14 @@ type SubscriptionBackend interface { // subscriptionManager handlers requests to subscribe to events from an events publisher. type subscriptionManager struct { - logger hclog.Logger - config Config - trustDomain string - viewStore MaterializedViewStore - backend SubscriptionBackend - getStore func() StateStore + logger hclog.Logger + config Config + trustDomain string + viewStore MaterializedViewStore + backend SubscriptionBackend + getStore func() StateStore + serviceSubReady <-chan struct{} + trustBundlesSubReady <-chan struct{} } // TODO(peering): Maybe centralize so that there is a single manager per datacenter, rather than per peering. @@ -49,18 +52,21 @@ func newSubscriptionManager( trustDomain string, backend SubscriptionBackend, getStore func() StateStore, + remoteSubTracker *resourceSubscriptionTracker, ) *subscriptionManager { logger = logger.Named("subscriptions") store := submatview.NewStore(logger.Named("viewstore")) go store.Run(ctx) return &subscriptionManager{ - logger: logger, - config: config, - trustDomain: trustDomain, - viewStore: store, - backend: backend, - getStore: getStore, + logger: logger, + config: config, + trustDomain: trustDomain, + viewStore: store, + backend: backend, + getStore: getStore, + serviceSubReady: remoteSubTracker.SubscribedChan(pbpeerstream.TypeURLExportedService), + trustBundlesSubReady: remoteSubTracker.SubscribedChan(pbpeerstream.TypeURLPeeringTrustBundle), } } @@ -297,6 +303,13 @@ func (m *subscriptionManager) notifyRootCAUpdatesForPartition( updateCh chan<- cache.UpdateEvent, partition string, ) { + // Wait until this is subscribed-to. + select { + case <-m.trustBundlesSubReady: + case <-ctx.Done(): + return + } + var idx uint64 // TODO(peering): retry logic; fail past a threshold for { diff --git a/agent/grpc-external/services/peerstream/subscription_manager_test.go b/agent/grpc-external/services/peerstream/subscription_manager_test.go index cd12b2c22..1a5269817 100644 --- a/agent/grpc-external/services/peerstream/subscription_manager_test.go +++ b/agent/grpc-external/services/peerstream/subscription_manager_test.go @@ -16,6 +16,7 @@ import ( "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/proto/pbcommon" "github.com/hashicorp/consul/proto/pbpeering" + "github.com/hashicorp/consul/proto/pbpeerstream" "github.com/hashicorp/consul/proto/pbservice" "github.com/hashicorp/consul/proto/prototest" "github.com/hashicorp/consul/sdk/testutil" @@ -32,12 +33,16 @@ func TestSubscriptionManager_RegisterDeregister(t *testing.T) { _, id := backend.ensurePeering(t, "my-peering") partition := acl.DefaultEnterpriseMeta().PartitionOrEmpty() + // Only configure a tracker for catalog events. + tracker := newResourceSubscriptionTracker() + tracker.Subscribe(pbpeerstream.TypeURLExportedService) + mgr := newSubscriptionManager(ctx, testutil.Logger(t), Config{ Datacenter: "dc1", ConnectEnabled: true, }, connect.TestTrustDomain, backend, func() StateStore { return backend.store - }) + }, tracker) subCh := mgr.subscribe(ctx, id, "my-peering", partition) var ( @@ -442,12 +447,16 @@ func TestSubscriptionManager_InitialSnapshot(t *testing.T) { _, id := backend.ensurePeering(t, "my-peering") partition := acl.DefaultEnterpriseMeta().PartitionOrEmpty() + // Only configure a tracker for catalog events. + tracker := newResourceSubscriptionTracker() + tracker.Subscribe(pbpeerstream.TypeURLExportedService) + mgr := newSubscriptionManager(ctx, testutil.Logger(t), Config{ Datacenter: "dc1", ConnectEnabled: true, }, connect.TestTrustDomain, backend, func() StateStore { return backend.store - }) + }, tracker) subCh := mgr.subscribe(ctx, id, "my-peering", partition) // Register two services that are not yet exported @@ -571,21 +580,21 @@ func TestSubscriptionManager_CARoots(t *testing.T) { _, id := backend.ensurePeering(t, "my-peering") partition := acl.DefaultEnterpriseMeta().PartitionOrEmpty() + // Only configure a tracker for CA roots events. + tracker := newResourceSubscriptionTracker() + tracker.Subscribe(pbpeerstream.TypeURLPeeringTrustBundle) + mgr := newSubscriptionManager(ctx, testutil.Logger(t), Config{ Datacenter: "dc1", ConnectEnabled: true, }, connect.TestTrustDomain, backend, func() StateStore { return backend.store - }) + }, tracker) subCh := mgr.subscribe(ctx, id, "my-peering", partition) testutil.RunStep(t, "initial events contain trust bundle", func(t *testing.T) { // events are ordered so we can expect a deterministic list expectEvents(t, subCh, - func(t *testing.T, got cache.UpdateEvent) { - // mesh-gateway assertions are done in other tests - require.Equal(t, subMeshGateway+partition, got.CorrelationID) - }, func(t *testing.T, got cache.UpdateEvent) { require.Equal(t, subCARoot, got.CorrelationID) roots, ok := got.Result.(*pbpeering.PeeringTrustBundle) diff --git a/agent/grpc-external/services/peerstream/testing.go b/agent/grpc-external/services/peerstream/testing.go index 939c38dfa..1f85b2b78 100644 --- a/agent/grpc-external/services/peerstream/testing.go +++ b/agent/grpc-external/services/peerstream/testing.go @@ -2,6 +2,7 @@ package peerstream import ( "context" + "fmt" "io" "sync" "time" @@ -24,14 +25,7 @@ func (c *MockClient) Send(r *pbpeerstream.ReplicationMessage) error { } func (c *MockClient) Recv() (*pbpeerstream.ReplicationMessage, error) { - select { - case err := <-c.ErrCh: - return nil, err - case r := <-c.ReplicationStream.sendCh: - return r, nil - case <-time.After(10 * time.Millisecond): - return nil, io.EOF - } + return c.RecvWithTimeout(10 * time.Millisecond) } func (c *MockClient) RecvWithTimeout(dur time.Duration) (*pbpeerstream.ReplicationMessage, error) { @@ -61,7 +55,6 @@ type MockStream struct { recvCh chan *pbpeerstream.ReplicationMessage ctx context.Context - mu sync.Mutex } var _ pbpeerstream.PeerStreamService_StreamResourcesServer = (*MockStream)(nil) @@ -117,12 +110,37 @@ func (s *MockStream) SendHeader(metadata.MD) error { // SetTrailer implements grpc.ServerStream func (s *MockStream) SetTrailer(metadata.MD) {} +// incrementalTime is an artificial clock used during testing. For those +// scenarios you would pass around the method pointer for `Now` in places where +// you would be using `time.Now`. type incrementalTime struct { base time.Time next uint64 + mu sync.Mutex } +// Now advances the internal clock by 1 second and returns that value. func (t *incrementalTime) Now() time.Time { + t.mu.Lock() + defer t.mu.Unlock() t.next++ - return t.base.Add(time.Duration(t.next) * time.Second) + + dur := time.Duration(t.next) * time.Second + + return t.base.Add(dur) +} + +// FutureNow will return a given future value of the Now() function. +// The numerical argument indicates which future Now value you wanted. The +// value must be > 0. +func (t *incrementalTime) FutureNow(n int) time.Time { + if n < 1 { + panic(fmt.Sprintf("argument must be > 1 but was %d", n)) + } + t.mu.Lock() + defer t.mu.Unlock() + + dur := time.Duration(t.next+uint64(n)) * time.Second + + return t.base.Add(dur) } diff --git a/agent/local/state.go b/agent/local/state.go index 74641a068..7909982db 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -256,15 +256,6 @@ func (l *State) aclTokenForServiceSync(id structs.ServiceID, fallback func() str return fallback() } -// AddService is used to add a service entry to the local state. -// This entry is persistent and the agent will make a best effort to -// ensure it is registered -func (l *State) AddService(service *structs.NodeService, token string) error { - l.Lock() - defer l.Unlock() - return l.addServiceLocked(service, token) -} - func (l *State) addServiceLocked(service *structs.NodeService, token string) error { if service == nil { return fmt.Errorf("no service") @@ -293,7 +284,9 @@ func (l *State) addServiceLocked(service *structs.NodeService, token string) err return nil } -// AddServiceWithChecks adds a service and its check tp the local state atomically +// AddServiceWithChecks adds a service entry and its checks to the local state atomically +// This entry is persistent and the agent will make a best effort to +// ensure it is registered func (l *State) AddServiceWithChecks(service *structs.NodeService, checks []*structs.HealthCheck, token string) error { l.Lock() defer l.Unlock() diff --git a/agent/local/state_test.go b/agent/local/state_test.go index 686c86a93..7aa539ea0 100644 --- a/agent/local/state_test.go +++ b/agent/local/state_test.go @@ -64,7 +64,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) { EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } assert.False(t, a.State.ServiceExists(structs.ServiceID{ID: srv1.ID})) - a.State.AddService(srv1, "") + a.State.AddServiceWithChecks(srv1, nil, "") assert.True(t, a.State.ServiceExists(structs.ServiceID{ID: srv1.ID})) args.Service = srv1 if err := a.RPC("Catalog.Register", args, &out); err != nil { @@ -83,7 +83,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv2, "") + a.State.AddServiceWithChecks(srv2, nil, "") srv2_mod := new(structs.NodeService) *srv2_mod = *srv2 @@ -105,7 +105,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv3, "") + a.State.AddServiceWithChecks(srv3, nil, "") // Exists remote (delete) srv4 := &structs.NodeService{ @@ -137,7 +137,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv5, "") + a.State.AddServiceWithChecks(srv5, nil, "") srv5_mod := new(structs.NodeService) *srv5_mod = *srv5 @@ -290,7 +290,7 @@ func TestAgentAntiEntropy_Services_ConnectProxy(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv1, "") + a.State.AddServiceWithChecks(srv1, nil, "") require.NoError(t, a.RPC("Catalog.Register", &structs.RegisterRequest{ Datacenter: "dc1", Node: a.Config.NodeName, @@ -311,7 +311,7 @@ func TestAgentAntiEntropy_Services_ConnectProxy(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv2, "") + a.State.AddServiceWithChecks(srv2, nil, "") srv2_mod := clone(srv2) srv2_mod.Port = 9000 @@ -335,7 +335,7 @@ func TestAgentAntiEntropy_Services_ConnectProxy(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv3, "") + a.State.AddServiceWithChecks(srv3, nil, "") // Exists remote (delete) srv4 := &structs.NodeService{ @@ -496,7 +496,7 @@ func TestAgent_ServiceWatchCh(t *testing.T) { Tags: []string{"tag1"}, Port: 6100, } - require.NoError(t, a.State.AddService(srv1, "")) + require.NoError(t, a.State.AddServiceWithChecks(srv1, nil, "")) verifyState := func(ss *local.ServiceState) { require.NotNil(t, ss) @@ -518,7 +518,7 @@ func TestAgent_ServiceWatchCh(t *testing.T) { go func() { srv2 := srv1 srv2.Port = 6200 - require.NoError(t, a.State.AddService(srv2, "")) + require.NoError(t, a.State.AddServiceWithChecks(srv2, nil, "")) }() // We should observe WatchCh close @@ -595,7 +595,7 @@ func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv1, "") + a.State.AddServiceWithChecks(srv1, nil, "") // register a local service with tag override disabled srv2 := &structs.NodeService{ @@ -610,7 +610,7 @@ func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv2, "") + a.State.AddServiceWithChecks(srv2, nil, "") // make sure they are both in the catalog if err := a.State.SyncChanges(); err != nil { @@ -722,7 +722,7 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) { Tags: []string{"primary"}, Port: 5000, } - a.State.AddService(srv, "") + a.State.AddServiceWithChecks(srv, nil, "") chk := &structs.HealthCheck{ Node: a.Config.NodeName, @@ -772,7 +772,7 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) { Tags: []string{"primary"}, Port: 5000, } - a.State.AddService(srv, "") + a.State.AddServiceWithChecks(srv, nil, "") chk1 := &structs.HealthCheck{ Node: a.Config.NodeName, @@ -873,7 +873,7 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv1, token) + a.State.AddServiceWithChecks(srv1, nil, token) // Create service (allowed) srv2 := &structs.NodeService{ @@ -887,7 +887,7 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv2, token) + a.State.AddServiceWithChecks(srv2, nil, token) if err := a.State.SyncFull(); err != nil { t.Fatalf("err: %v", err) @@ -1332,7 +1332,7 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv1, "root") + a.State.AddServiceWithChecks(srv1, nil, "root") srv2 := &structs.NodeService{ ID: "api", Service: "api", @@ -1344,7 +1344,7 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) { }, EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(), } - a.State.AddService(srv2, "root") + a.State.AddServiceWithChecks(srv2, nil, "root") if err := a.State.SyncFull(); err != nil { t.Fatalf("err: %v", err) @@ -1861,14 +1861,14 @@ func TestState_ServiceTokens(t *testing.T) { }) t.Run("empty string when there is no token", func(t *testing.T) { - err := l.AddService(&structs.NodeService{ID: "redis"}, "") + err := l.AddServiceWithChecks(&structs.NodeService{ID: "redis"}, nil, "") require.NoError(t, err) require.Equal(t, "", l.ServiceToken(id)) }) t.Run("returns configured token", func(t *testing.T) { - err := l.AddService(&structs.NodeService{ID: "redis"}, "abc123") + err := l.AddServiceWithChecks(&structs.NodeService{ID: "redis"}, nil, "abc123") require.NoError(t, err) require.Equal(t, "abc123", l.ServiceToken(id)) @@ -1931,7 +1931,7 @@ func TestAgent_CheckCriticalTime(t *testing.T) { l.TriggerSyncChanges = func() {} svc := &structs.NodeService{ID: "redis", Service: "redis", Port: 8000} - l.AddService(svc, "") + l.AddServiceWithChecks(svc, nil, "") // Add a passing check and make sure it's not critical. checkID := types.CheckID("redis:1") @@ -2017,8 +2017,8 @@ func TestAgent_AliasCheck(t *testing.T) { l.TriggerSyncChanges = func() {} // Add checks - require.NoError(t, l.AddService(&structs.NodeService{Service: "s1"}, "")) - require.NoError(t, l.AddService(&structs.NodeService{Service: "s2"}, "")) + require.NoError(t, l.AddServiceWithChecks(&structs.NodeService{Service: "s1"}, nil, "")) + require.NoError(t, l.AddServiceWithChecks(&structs.NodeService{Service: "s2"}, nil, "")) require.NoError(t, l.AddCheck(&structs.HealthCheck{CheckID: types.CheckID("c1"), ServiceID: "s1"}, "")) require.NoError(t, l.AddCheck(&structs.HealthCheck{CheckID: types.CheckID("c2"), ServiceID: "s2"}, "")) @@ -2071,7 +2071,7 @@ func TestAgent_AliasCheck_ServiceNotification(t *testing.T) { require.NoError(t, l.AddAliasCheck(structs.NewCheckID(types.CheckID("a1"), nil), structs.NewServiceID("s1", nil), notifyCh)) // Add aliased service, s1, and verify we get notified - require.NoError(t, l.AddService(&structs.NodeService{Service: "s1"}, "")) + require.NoError(t, l.AddServiceWithChecks(&structs.NodeService{Service: "s1"}, nil, "")) select { case <-notifyCh: default: @@ -2079,7 +2079,7 @@ func TestAgent_AliasCheck_ServiceNotification(t *testing.T) { } // Re-adding same service should not lead to a notification - require.NoError(t, l.AddService(&structs.NodeService{Service: "s1"}, "")) + require.NoError(t, l.AddServiceWithChecks(&structs.NodeService{Service: "s1"}, nil, "")) select { case <-notifyCh: t.Fatal("notify received") @@ -2087,7 +2087,7 @@ func TestAgent_AliasCheck_ServiceNotification(t *testing.T) { } // Add different service and verify we do not get notified - require.NoError(t, l.AddService(&structs.NodeService{Service: "s2"}, "")) + require.NoError(t, l.AddServiceWithChecks(&structs.NodeService{Service: "s2"}, nil, "")) select { case <-notifyCh: t.Fatal("notify received") @@ -2189,10 +2189,10 @@ func TestState_RemoveServiceErrorMessages(t *testing.T) { state.TriggerSyncChanges = func() {} // Add 1 service - err := state.AddService(&structs.NodeService{ + err := state.AddServiceWithChecks(&structs.NodeService{ ID: "web-id", Service: "web-name", - }, "") + }, nil, "") require.NoError(t, err) // Attempt to remove service that doesn't exist @@ -2230,9 +2230,9 @@ func TestState_Notify(t *testing.T) { drainCh(notifyCh) // Add a service - err := state.AddService(&structs.NodeService{ + err := state.AddServiceWithChecks(&structs.NodeService{ Service: "web", - }, "fake-token-web") + }, nil, "fake-token-web") require.NoError(t, err) // Should have a notification @@ -2240,10 +2240,10 @@ func TestState_Notify(t *testing.T) { drainCh(notifyCh) // Re-Add same service - err = state.AddService(&structs.NodeService{ + err = state.AddServiceWithChecks(&structs.NodeService{ Service: "web", Port: 4444, - }, "fake-token-web") + }, nil, "fake-token-web") require.NoError(t, err) // Should have a notification @@ -2261,9 +2261,9 @@ func TestState_Notify(t *testing.T) { state.StopNotify(notifyCh) // Add a service - err = state.AddService(&structs.NodeService{ + err = state.AddServiceWithChecks(&structs.NodeService{ Service: "web", - }, "fake-token-web") + }, nil, "fake-token-web") require.NoError(t, err) // Should NOT have a notification @@ -2293,7 +2293,7 @@ func TestAliasNotifications_local(t *testing.T) { Address: "127.0.0.10", Port: 8080, } - a.State.AddService(srv, "") + a.State.AddServiceWithChecks(srv, nil, "") scID := "socat-sidecar-proxy" sc := &structs.NodeService{ @@ -2303,7 +2303,7 @@ func TestAliasNotifications_local(t *testing.T) { Address: "127.0.0.10", Port: 9090, } - a.State.AddService(sc, "") + a.State.AddServiceWithChecks(sc, nil, "") tcpID := types.CheckID("service:socat-tcp") chk0 := &structs.HealthCheck{ diff --git a/agent/peering_endpoint.go b/agent/peering_endpoint.go index 22f4fc1ae..6ef7167b2 100644 --- a/agent/peering_endpoint.go +++ b/agent/peering_endpoint.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/hashicorp/consul/acl" + external "github.com/hashicorp/consul/agent/grpc-external" "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/lib" "github.com/hashicorp/consul/proto/pbpeering" @@ -32,17 +33,20 @@ func (s *HTTPHandlers) PeeringEndpoint(resp http.ResponseWriter, req *http.Reque // peeringRead fetches a peering that matches the name and partition. // This assumes that the name and partition parameters are valid func (s *HTTPHandlers) peeringRead(resp http.ResponseWriter, req *http.Request, name string) (interface{}, error) { - args := pbpeering.PeeringReadRequest{ - Name: name, - Datacenter: s.agent.config.Datacenter, - } var entMeta acl.EnterpriseMeta if err := s.parseEntMetaPartition(req, &entMeta); err != nil { return nil, err } - args.Partition = entMeta.PartitionOrEmpty() + args := pbpeering.PeeringReadRequest{ + Name: name, + Partition: entMeta.PartitionOrEmpty(), + } - result, err := s.agent.rpcClientPeering.PeeringRead(req.Context(), &args) + var token string + s.parseToken(req, &token) + ctx := external.ContextWithToken(req.Context(), token) + + result, err := s.agent.rpcClientPeering.PeeringRead(ctx, &args) if err != nil { return nil, err } @@ -55,16 +59,19 @@ func (s *HTTPHandlers) peeringRead(resp http.ResponseWriter, req *http.Request, // PeeringList fetches all peerings in the datacenter in OSS or in a given partition in Consul Enterprise. func (s *HTTPHandlers) PeeringList(resp http.ResponseWriter, req *http.Request) (interface{}, error) { - args := pbpeering.PeeringListRequest{ - Datacenter: s.agent.config.Datacenter, - } var entMeta acl.EnterpriseMeta if err := s.parseEntMetaPartition(req, &entMeta); err != nil { return nil, err } - args.Partition = entMeta.PartitionOrEmpty() + args := pbpeering.PeeringListRequest{ + Partition: entMeta.PartitionOrEmpty(), + } - pbresp, err := s.agent.rpcClientPeering.PeeringList(req.Context(), &args) + var token string + s.parseToken(req, &token) + ctx := external.ContextWithToken(req.Context(), token) + + pbresp, err := s.agent.rpcClientPeering.PeeringList(ctx, &args) if err != nil { return nil, err } @@ -79,14 +86,12 @@ func (s *HTTPHandlers) PeeringGenerateToken(resp http.ResponseWriter, req *http. return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: "The peering arguments must be provided in the body"} } - apiRequest := &api.PeeringGenerateTokenRequest{ - Datacenter: s.agent.config.Datacenter, - } - if err := lib.DecodeJSON(req.Body, apiRequest); err != nil { + var apiRequest api.PeeringGenerateTokenRequest + if err := lib.DecodeJSON(req.Body, &apiRequest); err != nil { return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Body decoding failed: %v", err)} } - args := pbpeering.NewGenerateTokenRequestFromAPI(apiRequest) + args := pbpeering.NewGenerateTokenRequestFromAPI(&apiRequest) if args.PeerName == "" { return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: "PeerName is required in the payload when generating a new peering token."} } @@ -99,7 +104,11 @@ func (s *HTTPHandlers) PeeringGenerateToken(resp http.ResponseWriter, req *http. args.Partition = entMeta.PartitionOrEmpty() } - out, err := s.agent.rpcClientPeering.GenerateToken(req.Context(), args) + var token string + s.parseToken(req, &token) + ctx := external.ContextWithToken(req.Context(), token) + + out, err := s.agent.rpcClientPeering.GenerateToken(ctx, args) if err != nil { return nil, err } @@ -114,23 +123,32 @@ func (s *HTTPHandlers) PeeringEstablish(resp http.ResponseWriter, req *http.Requ return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: "The peering arguments must be provided in the body"} } - apiRequest := &api.PeeringEstablishRequest{ - Datacenter: s.agent.config.Datacenter, - } - if err := lib.DecodeJSON(req.Body, apiRequest); err != nil { + var apiRequest api.PeeringEstablishRequest + if err := lib.DecodeJSON(req.Body, &apiRequest); err != nil { return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Body decoding failed: %v", err)} } - args := pbpeering.NewEstablishRequestFromAPI(apiRequest) + args := pbpeering.NewEstablishRequestFromAPI(&apiRequest) if args.PeerName == "" { return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: "PeerName is required in the payload when establishing a peering."} } - if args.PeeringToken == "" { return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: "PeeringToken is required in the payload when establishing a peering."} } - out, err := s.agent.rpcClientPeering.Establish(req.Context(), args) + var entMeta acl.EnterpriseMeta + if err := s.parseEntMetaPartition(req, &entMeta); err != nil { + return nil, err + } + if args.Partition == "" { + args.Partition = entMeta.PartitionOrEmpty() + } + + var token string + s.parseToken(req, &token) + ctx := external.ContextWithToken(req.Context(), token) + + out, err := s.agent.rpcClientPeering.Establish(ctx, args) if err != nil { return nil, err } @@ -141,17 +159,20 @@ func (s *HTTPHandlers) PeeringEstablish(resp http.ResponseWriter, req *http.Requ // peeringDelete initiates a deletion for a peering that matches the name and partition. // This assumes that the name and partition parameters are valid. func (s *HTTPHandlers) peeringDelete(resp http.ResponseWriter, req *http.Request, name string) (interface{}, error) { - args := pbpeering.PeeringDeleteRequest{ - Name: name, - Datacenter: s.agent.config.Datacenter, - } var entMeta acl.EnterpriseMeta if err := s.parseEntMetaPartition(req, &entMeta); err != nil { return nil, err } - args.Partition = entMeta.PartitionOrEmpty() + args := pbpeering.PeeringDeleteRequest{ + Name: name, + Partition: entMeta.PartitionOrEmpty(), + } - _, err := s.agent.rpcClientPeering.PeeringDelete(req.Context(), &args) + var token string + s.parseToken(req, &token) + ctx := external.ContextWithToken(req.Context(), token) + + _, err := s.agent.rpcClientPeering.PeeringDelete(ctx, &args) if err != nil { return nil, err } diff --git a/agent/peering_endpoint_test.go b/agent/peering_endpoint_test.go index 545e4f5ec..05b8646e9 100644 --- a/agent/peering_endpoint_test.go +++ b/agent/peering_endpoint_test.go @@ -12,6 +12,7 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/hashicorp/consul/agent/structs" @@ -113,6 +114,104 @@ func TestHTTP_Peering_GenerateToken(t *testing.T) { // The PeerID in the token is randomly generated so we don't assert on its value. require.NotEmpty(t, token.PeerID) }) + + t.Run("Success with external address", func(t *testing.T) { + externalAddress := "32.1.2.3" + body := &pbpeering.GenerateTokenRequest{ + PeerName: "peering-a", + ServerExternalAddresses: []string{externalAddress}, + } + + bodyBytes, err := json.Marshal(body) + require.NoError(t, err) + + req, err := http.NewRequest("POST", "/v1/peering/token", bytes.NewReader(bodyBytes)) + require.NoError(t, err) + resp := httptest.NewRecorder() + a.srv.h.ServeHTTP(resp, req) + require.Equal(t, http.StatusOK, resp.Code, "expected 200, got %d: %v", resp.Code, resp.Body.String()) + + var r pbpeering.GenerateTokenResponse + require.NoError(t, json.NewDecoder(resp.Body).Decode(&r)) + + tokenJSON, err := base64.StdEncoding.DecodeString(r.PeeringToken) + require.NoError(t, err) + + var token structs.PeeringToken + require.NoError(t, json.Unmarshal(tokenJSON, &token)) + + require.Nil(t, token.CA) + require.Equal(t, []string{externalAddress}, token.ServerAddresses) + require.Equal(t, "server.dc1.consul", token.ServerName) + + // The PeerID in the token is randomly generated so we don't assert on its value. + require.NotEmpty(t, token.PeerID) + }) +} + +// Test for GenerateToken calls at various points in a peer's lifecycle +func TestHTTP_Peering_GenerateToken_EdgeCases(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + t.Parallel() + + a := NewTestAgent(t, "") + testrpc.WaitForTestAgent(t, a.RPC, "dc1") + + body := &pbpeering.GenerateTokenRequest{ + PeerName: "peering-a", + } + + bodyBytes, err := json.Marshal(body) + require.NoError(t, err) + + getPeering := func(t *testing.T) *api.Peering { + t.Helper() + // Check state of peering + req, err := http.NewRequest("GET", "/v1/peering/peering-a", bytes.NewReader(bodyBytes)) + require.NoError(t, err) + resp := httptest.NewRecorder() + a.srv.h.ServeHTTP(resp, req) + require.Equal(t, http.StatusOK, resp.Code, "expected 200, got %d: %v", resp.Code, resp.Body.String()) + + var p *api.Peering + require.NoError(t, json.NewDecoder(resp.Body).Decode(&p)) + return p + } + + { + // Call once + req, err := http.NewRequest("POST", "/v1/peering/token", bytes.NewReader(bodyBytes)) + require.NoError(t, err) + resp := httptest.NewRecorder() + a.srv.h.ServeHTTP(resp, req) + require.Equal(t, http.StatusOK, resp.Code, "expected 200, got %d: %v", resp.Code, resp.Body.String()) + // Assertions tested in TestHTTP_Peering_GenerateToken + } + + if !t.Run("generate token called again", func(t *testing.T) { + before := getPeering(t) + require.Equal(t, api.PeeringStatePending, before.State) + + // Call again + req, err := http.NewRequest("POST", "/v1/peering/token", bytes.NewReader(bodyBytes)) + require.NoError(t, err) + resp := httptest.NewRecorder() + a.srv.h.ServeHTTP(resp, req) + require.Equal(t, http.StatusOK, resp.Code, "expected 200, got %d: %v", resp.Code, resp.Body.String()) + + after := getPeering(t) + assert.NotEqual(t, before.ModifyIndex, after.ModifyIndex) + // blank out modify index so we can compare rest of struct + before.ModifyIndex, after.ModifyIndex = 0, 0 + assert.Equal(t, before, after) + + }) { + t.FailNow() + } + } func TestHTTP_Peering_Establish(t *testing.T) { diff --git a/agent/prepared_query_endpoint_test.go b/agent/prepared_query_endpoint_test.go index 34b8975fd..9cf805b88 100644 --- a/agent/prepared_query_endpoint_test.go +++ b/agent/prepared_query_endpoint_test.go @@ -92,7 +92,7 @@ func TestPreparedQuery_Create(t *testing.T) { Session: "my-session", Service: structs.ServiceQuery{ Service: "my-service", - Failover: structs.QueryDatacenterOptions{ + Failover: structs.QueryFailoverOptions{ NearestN: 4, Datacenters: []string{"dc1", "dc2"}, }, @@ -883,7 +883,7 @@ func TestPreparedQuery_Update(t *testing.T) { Session: "my-session", Service: structs.ServiceQuery{ Service: "my-service", - Failover: structs.QueryDatacenterOptions{ + Failover: structs.QueryFailoverOptions{ NearestN: 4, Datacenters: []string{"dc1", "dc2"}, }, diff --git a/agent/proxycfg-glue/config_entry.go b/agent/proxycfg-glue/config_entry.go index 8f85d5e13..1f6fbf245 100644 --- a/agent/proxycfg-glue/config_entry.go +++ b/agent/proxycfg-glue/config_entry.go @@ -12,6 +12,7 @@ import ( "github.com/hashicorp/consul/agent/proxycfg" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/submatview" + "github.com/hashicorp/consul/proto/pbcommon" "github.com/hashicorp/consul/proto/pbconfigentry" "github.com/hashicorp/consul/proto/pbsubscribe" ) @@ -19,6 +20,7 @@ import ( // ServerDataSourceDeps contains the dependencies needed for sourcing data from // server-local sources (e.g. materialized views). type ServerDataSourceDeps struct { + Datacenter string ViewStore *submatview.Store EventPublisher *stream.EventPublisher Logger hclog.Logger @@ -193,7 +195,7 @@ func (v *configEntryListView) Result(index uint64) any { } func (v *configEntryListView) Update(events []*pbsubscribe.Event) error { - for _, event := range v.filterByEnterpriseMeta(events) { + for _, event := range filterByEnterpriseMeta(events, v.entMeta) { update := event.GetConfigEntry() configEntry := pbconfigentry.ConfigEntryToStructs(update.ConfigEntry) name := structs.NewServiceName(configEntry.GetName(), configEntry.GetEnterpriseMeta()).String() @@ -212,22 +214,26 @@ func (v *configEntryListView) Update(events []*pbsubscribe.Event) error { // don't match the request's enterprise meta - this is necessary because when // subscribing to a topic with SubjectWildcard we'll get events for resources // in all partitions and namespaces. -func (v *configEntryListView) filterByEnterpriseMeta(events []*pbsubscribe.Event) []*pbsubscribe.Event { - partition := v.entMeta.PartitionOrDefault() - namespace := v.entMeta.NamespaceOrDefault() +func filterByEnterpriseMeta(events []*pbsubscribe.Event, entMeta acl.EnterpriseMeta) []*pbsubscribe.Event { + partition := entMeta.PartitionOrDefault() + namespace := entMeta.NamespaceOrDefault() filtered := make([]*pbsubscribe.Event, 0, len(events)) for _, event := range events { - configEntry := event.GetConfigEntry().GetConfigEntry() - if configEntry == nil { + var eventEntMeta *pbcommon.EnterpriseMeta + switch payload := event.Payload.(type) { + case *pbsubscribe.Event_ConfigEntry: + eventEntMeta = payload.ConfigEntry.ConfigEntry.GetEnterpriseMeta() + case *pbsubscribe.Event_Service: + eventEntMeta = payload.Service.GetEnterpriseMeta() + default: continue } - entMeta := configEntry.GetEnterpriseMeta() - if partition != acl.WildcardName && !acl.EqualPartitions(partition, entMeta.GetPartition()) { + if partition != acl.WildcardName && !acl.EqualPartitions(partition, eventEntMeta.GetPartition()) { continue } - if namespace != acl.WildcardName && !acl.EqualNamespaces(namespace, entMeta.GetNamespace()) { + if namespace != acl.WildcardName && !acl.EqualNamespaces(namespace, eventEntMeta.GetNamespace()) { continue } diff --git a/agent/proxycfg-glue/discovery_chain.go b/agent/proxycfg-glue/discovery_chain.go new file mode 100644 index 000000000..78e1f1653 --- /dev/null +++ b/agent/proxycfg-glue/discovery_chain.go @@ -0,0 +1,95 @@ +package proxycfgglue + +import ( + "context" + + "github.com/hashicorp/go-memdb" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/consul/discoverychain" + "github.com/hashicorp/consul/agent/consul/watch" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" +) + +// CacheCompiledDiscoveryChain satisfies the proxycfg.CompiledDiscoveryChain +// interface by sourcing data from the agent cache. +func CacheCompiledDiscoveryChain(c *cache.Cache) proxycfg.CompiledDiscoveryChain { + return &cacheProxyDataSource[*structs.DiscoveryChainRequest]{c, cachetype.CompiledDiscoveryChainName} +} + +// ServerCompiledDiscoveryChain satisfies the proxycfg.CompiledDiscoveryChain +// interface by sourcing data from a blocking query against the server's state +// store. +// +// Requests for services in remote datacenters will be delegated to the given +// remoteSource (i.e. CacheCompiledDiscoveryChain). +func ServerCompiledDiscoveryChain(deps ServerDataSourceDeps, remoteSource proxycfg.CompiledDiscoveryChain) proxycfg.CompiledDiscoveryChain { + return &serverCompiledDiscoveryChain{deps, remoteSource} +} + +type serverCompiledDiscoveryChain struct { + deps ServerDataSourceDeps + remoteSource proxycfg.CompiledDiscoveryChain +} + +func (s serverCompiledDiscoveryChain) Notify(ctx context.Context, req *structs.DiscoveryChainRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + if req.Datacenter != s.deps.Datacenter { + return s.remoteSource.Notify(ctx, req, correlationID, ch) + } + + entMeta := req.GetEnterpriseMeta() + + evalDC := req.EvaluateInDatacenter + if evalDC == "" { + evalDC = s.deps.Datacenter + } + + compileReq := discoverychain.CompileRequest{ + ServiceName: req.Name, + EvaluateInNamespace: entMeta.NamespaceOrDefault(), + EvaluateInPartition: entMeta.PartitionOrDefault(), + EvaluateInDatacenter: evalDC, + OverrideMeshGateway: req.OverrideMeshGateway, + OverrideProtocol: req.OverrideProtocol, + OverrideConnectTimeout: req.OverrideConnectTimeout, + } + + return watch.ServerLocalNotify(ctx, correlationID, s.deps.GetStore, + func(ws memdb.WatchSet, store Store) (uint64, *structs.DiscoveryChainResponse, error) { + var authzContext acl.AuthorizerContext + authz, err := s.deps.ACLResolver.ResolveTokenAndDefaultMeta(req.Token, req.GetEnterpriseMeta(), &authzContext) + if err != nil { + return 0, nil, err + } + if err := authz.ToAllowAuthorizer().ServiceReadAllowed(req.Name, &authzContext); err != nil { + // TODO(agentless): the agent cache handles acl.IsErrNotFound specially to + // prevent endlessly retrying if an ACL token is deleted. We should probably + // do this in watch.ServerLocalNotify too. + return 0, nil, err + } + + index, chain, entries, err := store.ServiceDiscoveryChain(ws, req.Name, entMeta, compileReq) + if err != nil { + return 0, nil, err + } + + rsp := &structs.DiscoveryChainResponse{ + Chain: chain, + QueryMeta: structs.QueryMeta{ + Backend: structs.QueryBackendBlocking, + Index: index, + }, + } + + // TODO(boxofrad): Check with @mkeeler that this is the correct thing to do. + if entries.IsEmpty() { + return index, rsp, watch.ErrorNotFound + } + return index, rsp, nil + }, + dispatchBlockingQueryUpdate[*structs.DiscoveryChainResponse](ch), + ) +} diff --git a/agent/proxycfg-glue/discovery_chain_test.go b/agent/proxycfg-glue/discovery_chain_test.go new file mode 100644 index 000000000..7207ffaf1 --- /dev/null +++ b/agent/proxycfg-glue/discovery_chain_test.go @@ -0,0 +1,114 @@ +package proxycfgglue + +import ( + "context" + "errors" + "fmt" + "testing" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" +) + +func TestServerCompiledDiscoveryChain(t *testing.T) { + t.Run("remote queries are delegated to the remote source", func(t *testing.T) { + var ( + ctx = context.Background() + req = &structs.DiscoveryChainRequest{Datacenter: "dc2"} + correlationID = "correlation-id" + ch = make(chan<- proxycfg.UpdateEvent) + result = errors.New("KABOOM") + ) + + remoteSource := newMockCompiledDiscoveryChain(t) + remoteSource.On("Notify", ctx, req, correlationID, ch).Return(result) + + dataSource := ServerCompiledDiscoveryChain(ServerDataSourceDeps{Datacenter: "dc1"}, remoteSource) + err := dataSource.Notify(ctx, req, correlationID, ch) + require.Equal(t, result, err) + }) + + t.Run("local queries are served from the state store", func(t *testing.T) { + const ( + serviceName = "web" + datacenter = "dc1" + index = 123 + ) + + store := state.NewStateStore(nil) + require.NoError(t, store.CASetConfig(index, &structs.CAConfiguration{ClusterID: "cluster-id"})) + require.NoError(t, store.EnsureConfigEntry(index, &structs.ServiceConfigEntry{ + Name: serviceName, + Kind: structs.ServiceDefaults, + })) + + req := &structs.DiscoveryChainRequest{ + Name: serviceName, + Datacenter: datacenter, + } + + resolver := newStaticResolver( + policyAuthorizer(t, fmt.Sprintf(`service "%s" { policy = "read" }`, serviceName)), + ) + + dataSource := ServerCompiledDiscoveryChain(ServerDataSourceDeps{ + ACLResolver: resolver, + Datacenter: datacenter, + GetStore: func() Store { return store }, + }, nil) + + eventCh := make(chan proxycfg.UpdateEvent) + err := dataSource.Notify(context.Background(), req, "", eventCh) + require.NoError(t, err) + + // Check we get an event with the initial state. + result := getEventResult[*structs.DiscoveryChainResponse](t, eventCh) + require.NotNil(t, result.Chain) + + // Change the protocol to HTTP and check we get a recompiled chain. + require.NoError(t, store.EnsureConfigEntry(index+1, &structs.ServiceConfigEntry{ + Name: serviceName, + Kind: structs.ServiceDefaults, + Protocol: "http", + })) + + result = getEventResult[*structs.DiscoveryChainResponse](t, eventCh) + require.NotNil(t, result.Chain) + require.Equal(t, "http", result.Chain.Protocol) + + // Revoke access to the service. + resolver.SwapAuthorizer(acl.DenyAll()) + + // Write another config entry. + require.NoError(t, store.EnsureConfigEntry(index+2, &structs.ServiceConfigEntry{ + Name: serviceName, + Kind: structs.ServiceDefaults, + MaxInboundConnections: 1, + })) + + // Should no longer receive events for this service. + expectNoEvent(t, eventCh) + }) +} + +func newMockCompiledDiscoveryChain(t *testing.T) *mockCompiledDiscoveryChain { + mock := &mockCompiledDiscoveryChain{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} + +type mockCompiledDiscoveryChain struct { + mock.Mock +} + +func (m *mockCompiledDiscoveryChain) Notify(ctx context.Context, req *structs.DiscoveryChainRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + return m.Called(ctx, req, correlationID, ch).Error(0) +} diff --git a/agent/proxycfg-glue/exported_peered_services.go b/agent/proxycfg-glue/exported_peered_services.go new file mode 100644 index 000000000..3ce8db632 --- /dev/null +++ b/agent/proxycfg-glue/exported_peered_services.go @@ -0,0 +1,60 @@ +package proxycfgglue + +import ( + "context" + + "github.com/hashicorp/go-memdb" + + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/consul/watch" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/structs/aclfilter" +) + +// CacheExportedPeeredServices satisfies the proxycfg.ExportedPeeredServices +// interface by sourcing data from the agent cache. +func CacheExportedPeeredServices(c *cache.Cache) proxycfg.ExportedPeeredServices { + return &cacheProxyDataSource[*structs.DCSpecificRequest]{c, cachetype.ExportedPeeredServicesName} +} + +// ServerExportedPeeredServices satisifies the proxycfg.ExportedPeeredServices +// interface by sourcing data from a blocking query against the server's state +// store. +func ServerExportedPeeredServices(deps ServerDataSourceDeps) proxycfg.ExportedPeeredServices { + return &serverExportedPeeredServices{deps} +} + +type serverExportedPeeredServices struct { + deps ServerDataSourceDeps +} + +func (s *serverExportedPeeredServices) Notify(ctx context.Context, req *structs.DCSpecificRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + return watch.ServerLocalNotify(ctx, correlationID, s.deps.GetStore, + func(ws memdb.WatchSet, store Store) (uint64, *structs.IndexedExportedServiceList, error) { + // TODO(peering): acls: mesh gateway needs appropriate wildcard service:read + authz, err := s.deps.ACLResolver.ResolveTokenAndDefaultMeta(req.Token, &req.EnterpriseMeta, nil) + if err != nil { + return 0, nil, err + } + + index, serviceMap, err := store.ExportedServicesForAllPeersByName(ws, req.EnterpriseMeta) + if err != nil { + return 0, nil, err + } + + result := &structs.IndexedExportedServiceList{ + Services: serviceMap, + QueryMeta: structs.QueryMeta{ + Backend: structs.QueryBackendBlocking, + Index: index, + }, + } + aclfilter.New(authz, s.deps.Logger).Filter(result) + + return index, result, nil + }, + dispatchBlockingQueryUpdate[*structs.IndexedExportedServiceList](ch), + ) +} diff --git a/agent/proxycfg-glue/exported_peered_services_test.go b/agent/proxycfg-glue/exported_peered_services_test.go new file mode 100644 index 000000000..552519bb1 --- /dev/null +++ b/agent/proxycfg-glue/exported_peered_services_test.go @@ -0,0 +1,113 @@ +package proxycfgglue + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/proto/pbpeering" + "github.com/hashicorp/consul/sdk/testutil" +) + +func TestServerExportedPeeredServices(t *testing.T) { + nextIndex := indexGenerator() + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + store := state.NewStateStore(nil) + + for _, peer := range []string{"peer-1", "peer-2", "peer-3"} { + require.NoError(t, store.PeeringWrite(nextIndex(), &pbpeering.Peering{ + ID: testUUID(t), + Name: peer, + State: pbpeering.PeeringState_ACTIVE, + })) + } + + require.NoError(t, store.EnsureConfigEntry(nextIndex(), &structs.ExportedServicesConfigEntry{ + Name: "default", + Services: []structs.ExportedService{ + { + Name: "web", + Consumers: []structs.ServiceConsumer{ + {PeerName: "peer-1"}, + }, + }, + { + Name: "db", + Consumers: []structs.ServiceConsumer{ + {PeerName: "peer-2"}, + }, + }, + }, + })) + + authz := policyAuthorizer(t, ` + service "web" { policy = "read" } + service "api" { policy = "read" } + service "db" { policy = "deny" } + `) + + eventCh := make(chan proxycfg.UpdateEvent) + dataSource := ServerExportedPeeredServices(ServerDataSourceDeps{ + GetStore: func() Store { return store }, + ACLResolver: newStaticResolver(authz), + }) + require.NoError(t, dataSource.Notify(ctx, &structs.DCSpecificRequest{}, "", eventCh)) + + testutil.RunStep(t, "initial state", func(t *testing.T) { + result := getEventResult[*structs.IndexedExportedServiceList](t, eventCh) + require.Equal(t, + map[string]structs.ServiceList{ + "peer-1": {structs.NewServiceName("web", nil)}, + }, + result.Services, + ) + }) + + testutil.RunStep(t, "update exported services", func(t *testing.T) { + require.NoError(t, store.EnsureConfigEntry(nextIndex(), &structs.ExportedServicesConfigEntry{ + Name: "default", + Services: []structs.ExportedService{ + { + Name: "web", + Consumers: []structs.ServiceConsumer{ + {PeerName: "peer-1"}, + }, + }, + { + Name: "db", + Consumers: []structs.ServiceConsumer{ + {PeerName: "peer-2"}, + }, + }, + { + Name: "api", + Consumers: []structs.ServiceConsumer{ + {PeerName: "peer-1"}, + {PeerName: "peer-3"}, + }, + }, + }, + })) + + result := getEventResult[*structs.IndexedExportedServiceList](t, eventCh) + require.Equal(t, + map[string]structs.ServiceList{ + "peer-1": { + structs.NewServiceName("api", nil), + structs.NewServiceName("web", nil), + }, + "peer-3": { + structs.NewServiceName("api", nil), + }, + }, + result.Services, + ) + }) +} diff --git a/agent/proxycfg-glue/federation_state_list_mesh_gateways.go b/agent/proxycfg-glue/federation_state_list_mesh_gateways.go new file mode 100644 index 000000000..ea3640ad9 --- /dev/null +++ b/agent/proxycfg-glue/federation_state_list_mesh_gateways.go @@ -0,0 +1,67 @@ +package proxycfgglue + +import ( + "context" + + "github.com/hashicorp/go-memdb" + + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/consul/watch" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/structs/aclfilter" +) + +// CacheFederationStateListMeshGateways satisfies the proxycfg.FederationStateListMeshGateways +// interface by sourcing data from the agent cache. +func CacheFederationStateListMeshGateways(c *cache.Cache) proxycfg.FederationStateListMeshGateways { + return &cacheProxyDataSource[*structs.DCSpecificRequest]{c, cachetype.FederationStateListMeshGatewaysName} +} + +// ServerFederationStateListMeshGateways satisfies the proxycfg.FederationStateListMeshGateways +// interface by sourcing data from a blocking query against the server's state +// store. +func ServerFederationStateListMeshGateways(deps ServerDataSourceDeps) proxycfg.FederationStateListMeshGateways { + return &serverFederationStateListMeshGateways{deps} +} + +type serverFederationStateListMeshGateways struct { + deps ServerDataSourceDeps +} + +func (s *serverFederationStateListMeshGateways) Notify(ctx context.Context, req *structs.DCSpecificRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + return watch.ServerLocalNotify(ctx, correlationID, s.deps.GetStore, + func(ws memdb.WatchSet, store Store) (uint64, *structs.DatacenterIndexedCheckServiceNodes, error) { + authz, err := s.deps.ACLResolver.ResolveTokenAndDefaultMeta(req.Token, &req.EnterpriseMeta, nil) + if err != nil { + return 0, nil, err + } + + index, fedStates, err := store.FederationStateList(ws) + if err != nil { + return 0, nil, err + } + + results := make(map[string]structs.CheckServiceNodes) + for _, fs := range fedStates { + if gws := fs.MeshGateways; len(gws) != 0 { + // Shallow clone to prevent ACL filtering manipulating the slice in memdb. + results[fs.Datacenter] = gws.ShallowClone() + } + } + + rsp := &structs.DatacenterIndexedCheckServiceNodes{ + DatacenterNodes: results, + QueryMeta: structs.QueryMeta{ + Index: index, + Backend: structs.QueryBackendBlocking, + }, + } + aclfilter.New(authz, s.deps.Logger).Filter(rsp) + + return index, rsp, nil + }, + dispatchBlockingQueryUpdate[*structs.DatacenterIndexedCheckServiceNodes](ch), + ) +} diff --git a/agent/proxycfg-glue/federation_state_list_mesh_gateways_test.go b/agent/proxycfg-glue/federation_state_list_mesh_gateways_test.go new file mode 100644 index 000000000..5c716d24c --- /dev/null +++ b/agent/proxycfg-glue/federation_state_list_mesh_gateways_test.go @@ -0,0 +1,103 @@ +package proxycfgglue + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/sdk/testutil" +) + +func TestServerFederationStateListMeshGateways(t *testing.T) { + const index uint64 = 123 + + store := state.NewStateStore(nil) + + authz := policyAuthorizer(t, ` + service_prefix "dc2-" { policy = "read" } + node_prefix "dc2-" { policy = "read" } + + service_prefix "dc3-" { policy = "read" } + node_prefix "dc3-" { policy = "read" } + `) + + require.NoError(t, store.FederationStateSet(index, &structs.FederationState{ + Datacenter: "dc2", + MeshGateways: structs.CheckServiceNodes{ + { + Service: &structs.NodeService{Service: "dc2-gw1"}, + Node: &structs.Node{Node: "dc2-gw1"}, + }, + }, + })) + + // No access to this DC, we shouldn't see it in results. + require.NoError(t, store.FederationStateSet(index, &structs.FederationState{ + Datacenter: "dc4", + MeshGateways: structs.CheckServiceNodes{ + { + Service: &structs.NodeService{Service: "dc4-gw1"}, + Node: &structs.Node{Node: "dc4-gw1"}, + }, + }, + })) + + dataSource := ServerFederationStateListMeshGateways(ServerDataSourceDeps{ + ACLResolver: newStaticResolver(authz), + GetStore: func() Store { return store }, + }) + + eventCh := make(chan proxycfg.UpdateEvent) + require.NoError(t, dataSource.Notify(context.Background(), &structs.DCSpecificRequest{Datacenter: "dc1"}, "", eventCh)) + + testutil.RunStep(t, "initial state", func(t *testing.T) { + result := getEventResult[*structs.DatacenterIndexedCheckServiceNodes](t, eventCh) + require.Equal(t, map[string]structs.CheckServiceNodes{ + "dc2": { + { + Service: &structs.NodeService{Service: "dc2-gw1"}, + Node: &structs.Node{Node: "dc2-gw1"}, + }, + }, + }, result.DatacenterNodes) + }) + + testutil.RunStep(t, "add new datacenter", func(t *testing.T) { + require.NoError(t, store.FederationStateSet(index+1, &structs.FederationState{ + Datacenter: "dc3", + MeshGateways: structs.CheckServiceNodes{ + { + Service: &structs.NodeService{Service: "dc3-gw1"}, + Node: &structs.Node{Node: "dc3-gw1"}, + }, + }, + })) + + result := getEventResult[*structs.DatacenterIndexedCheckServiceNodes](t, eventCh) + require.Equal(t, map[string]structs.CheckServiceNodes{ + "dc2": { + { + Service: &structs.NodeService{Service: "dc2-gw1"}, + Node: &structs.Node{Node: "dc2-gw1"}, + }, + }, + "dc3": { + { + Service: &structs.NodeService{Service: "dc3-gw1"}, + Node: &structs.Node{Node: "dc3-gw1"}, + }, + }, + }, result.DatacenterNodes) + }) + + testutil.RunStep(t, "delete datacenter", func(t *testing.T) { + require.NoError(t, store.FederationStateDelete(index+2, "dc3")) + + result := getEventResult[*structs.DatacenterIndexedCheckServiceNodes](t, eventCh) + require.NotContains(t, result.DatacenterNodes, "dc3") + }) +} diff --git a/agent/proxycfg-glue/gateway_services.go b/agent/proxycfg-glue/gateway_services.go new file mode 100644 index 000000000..8c90f949d --- /dev/null +++ b/agent/proxycfg-glue/gateway_services.go @@ -0,0 +1,63 @@ +package proxycfgglue + +import ( + "context" + + "github.com/hashicorp/go-memdb" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/consul/watch" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/structs/aclfilter" +) + +// CacheGatewayServices satisfies the proxycfg.GatewayServices interface by +// sourcing data from the agent cache. +func CacheGatewayServices(c *cache.Cache) proxycfg.GatewayServices { + return &cacheProxyDataSource[*structs.ServiceSpecificRequest]{c, cachetype.GatewayServicesName} +} + +// ServerGatewayServices satisfies the proxycfg.GatewayServices interface by +// sourcing data from a blocking query against the server's state store. +func ServerGatewayServices(deps ServerDataSourceDeps) proxycfg.GatewayServices { + return &serverGatewayServices{deps} +} + +type serverGatewayServices struct { + deps ServerDataSourceDeps +} + +func (s *serverGatewayServices) Notify(ctx context.Context, req *structs.ServiceSpecificRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + return watch.ServerLocalNotify(ctx, correlationID, s.deps.GetStore, + func(ws memdb.WatchSet, store Store) (uint64, *structs.IndexedGatewayServices, error) { + var authzContext acl.AuthorizerContext + authz, err := s.deps.ACLResolver.ResolveTokenAndDefaultMeta(req.Token, &req.EnterpriseMeta, &authzContext) + if err != nil { + return 0, nil, err + } + if err := authz.ToAllowAuthorizer().ServiceReadAllowed(req.ServiceName, &authzContext); err != nil { + return 0, nil, err + } + + index, services, err := store.GatewayServices(ws, req.ServiceName, &req.EnterpriseMeta) + if err != nil { + return 0, nil, err + } + + response := &structs.IndexedGatewayServices{ + Services: services, + QueryMeta: structs.QueryMeta{ + Backend: structs.QueryBackendBlocking, + Index: index, + }, + } + aclfilter.New(authz, s.deps.Logger).Filter(response) + + return index, response, nil + }, + dispatchBlockingQueryUpdate[*structs.IndexedGatewayServices](ch), + ) +} diff --git a/agent/proxycfg-glue/gateway_services_test.go b/agent/proxycfg-glue/gateway_services_test.go new file mode 100644 index 000000000..bb20f489d --- /dev/null +++ b/agent/proxycfg-glue/gateway_services_test.go @@ -0,0 +1,155 @@ +package proxycfgglue + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/sdk/testutil" +) + +func TestServerGatewayServices(t *testing.T) { + const index uint64 = 123 + + t.Run("ingress gateway", func(t *testing.T) { + store := state.NewStateStore(nil) + + authz := policyAuthorizer(t, ` + service "igw" { policy = "read" } + service "web" { policy = "read" } + service "db" { policy = "read" } + `) + + require.NoError(t, store.EnsureConfigEntry(index, &structs.IngressGatewayConfigEntry{ + Name: "igw", + Listeners: []structs.IngressListener{ + { + Protocol: "tcp", + Services: []structs.IngressService{ + {Name: "web"}, + }, + }, + { + Protocol: "tcp", + Services: []structs.IngressService{ + {Name: "db"}, + }, + }, + { + Protocol: "tcp", + Services: []structs.IngressService{ + {Name: "no-access"}, + }, + }, + }, + })) + + dataSource := ServerGatewayServices(ServerDataSourceDeps{ + ACLResolver: newStaticResolver(authz), + GetStore: func() Store { return store }, + }) + + eventCh := make(chan proxycfg.UpdateEvent) + require.NoError(t, dataSource.Notify(context.Background(), &structs.ServiceSpecificRequest{ServiceName: "igw"}, "", eventCh)) + + testutil.RunStep(t, "initial state", func(t *testing.T) { + result := getEventResult[*structs.IndexedGatewayServices](t, eventCh) + require.Len(t, result.Services, 2) + }) + + testutil.RunStep(t, "remove service mapping", func(t *testing.T) { + require.NoError(t, store.EnsureConfigEntry(index+1, &structs.IngressGatewayConfigEntry{ + Name: "igw", + Listeners: []structs.IngressListener{ + { + Protocol: "tcp", + Services: []structs.IngressService{ + {Name: "web"}, + }, + }, + }, + })) + + result := getEventResult[*structs.IndexedGatewayServices](t, eventCh) + require.Len(t, result.Services, 1) + }) + }) + + t.Run("terminating gateway", func(t *testing.T) { + store := state.NewStateStore(nil) + + authz := policyAuthorizer(t, ` + service "tgw" { policy = "read" } + service "web" { policy = "read" } + service "db" { policy = "read" } + `) + + require.NoError(t, store.EnsureConfigEntry(index, &structs.TerminatingGatewayConfigEntry{ + Name: "tgw", + Services: []structs.LinkedService{ + {Name: "web"}, + {Name: "db"}, + {Name: "no-access"}, + }, + })) + + dataSource := ServerGatewayServices(ServerDataSourceDeps{ + ACLResolver: newStaticResolver(authz), + GetStore: func() Store { return store }, + }) + + eventCh := make(chan proxycfg.UpdateEvent) + require.NoError(t, dataSource.Notify(context.Background(), &structs.ServiceSpecificRequest{ServiceName: "tgw"}, "", eventCh)) + + testutil.RunStep(t, "initial state", func(t *testing.T) { + result := getEventResult[*structs.IndexedGatewayServices](t, eventCh) + require.Len(t, result.Services, 2) + }) + + testutil.RunStep(t, "remove service mapping", func(t *testing.T) { + require.NoError(t, store.EnsureConfigEntry(index+1, &structs.TerminatingGatewayConfigEntry{ + Name: "tgw", + Services: []structs.LinkedService{ + {Name: "web"}, + }, + })) + + result := getEventResult[*structs.IndexedGatewayServices](t, eventCh) + require.Len(t, result.Services, 1) + }) + }) + + t.Run("no access to gateway", func(t *testing.T) { + store := state.NewStateStore(nil) + + authz := policyAuthorizer(t, ` + service "tgw" { policy = "deny" } + service "web" { policy = "read" } + service "db" { policy = "read" } + `) + + require.NoError(t, store.EnsureConfigEntry(index, &structs.TerminatingGatewayConfigEntry{ + Name: "tgw", + Services: []structs.LinkedService{ + {Name: "web"}, + {Name: "db"}, + }, + })) + + dataSource := ServerGatewayServices(ServerDataSourceDeps{ + ACLResolver: newStaticResolver(authz), + GetStore: func() Store { return store }, + }) + + eventCh := make(chan proxycfg.UpdateEvent) + require.NoError(t, dataSource.Notify(context.Background(), &structs.ServiceSpecificRequest{ServiceName: "tgw"}, "", eventCh)) + + err := getEventError(t, eventCh) + require.True(t, acl.IsErrPermissionDenied(err), "expected permission denied error") + }) +} diff --git a/agent/proxycfg-glue/glue.go b/agent/proxycfg-glue/glue.go index 0fb0b7752..86badf67e 100644 --- a/agent/proxycfg-glue/glue.go +++ b/agent/proxycfg-glue/glue.go @@ -3,23 +3,33 @@ package proxycfgglue import ( "context" + "github.com/hashicorp/consul/proto/pbpeering" "github.com/hashicorp/go-memdb" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/cache" cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/configentry" + "github.com/hashicorp/consul/agent/consul/discoverychain" + "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/watch" "github.com/hashicorp/consul/agent/proxycfg" - "github.com/hashicorp/consul/agent/rpcclient/health" "github.com/hashicorp/consul/agent/structs" - "github.com/hashicorp/consul/proto/pbpeering" ) // Store is the state store interface required for server-local data sources. type Store interface { watch.StateStore + ExportedServicesForAllPeersByName(ws memdb.WatchSet, entMeta acl.EnterpriseMeta) (uint64, map[string]structs.ServiceList, error) + FederationStateList(ws memdb.WatchSet) (uint64, []*structs.FederationState, error) + GatewayServices(ws memdb.WatchSet, gateway string, entMeta *acl.EnterpriseMeta) (uint64, structs.GatewayServices, error) IntentionTopology(ws memdb.WatchSet, target structs.ServiceName, downstreams bool, defaultDecision acl.EnforcementDecision, intentionTarget structs.IntentionTargetType) (uint64, structs.ServiceList, error) + ServiceDiscoveryChain(ws memdb.WatchSet, serviceName string, entMeta *acl.EnterpriseMeta, req discoverychain.CompileRequest) (uint64, *structs.CompiledDiscoveryChain, *configentry.DiscoveryChainSet, error) + PeeringTrustBundleRead(ws memdb.WatchSet, q state.Query) (uint64, *pbpeering.PeeringTrustBundle, error) + PeeringTrustBundleList(ws memdb.WatchSet, entMeta acl.EnterpriseMeta) (uint64, []*pbpeering.PeeringTrustBundle, error) + TrustBundleListByService(ws memdb.WatchSet, service, dc string, entMeta acl.EnterpriseMeta) (uint64, []*pbpeering.PeeringTrustBundle, error) + VirtualIPsForAllImportedServices(ws memdb.WatchSet, entMeta acl.EnterpriseMeta) (uint64, []state.ServiceVirtualIP, error) } // CacheCARoots satisfies the proxycfg.CARoots interface by sourcing data from @@ -28,12 +38,6 @@ func CacheCARoots(c *cache.Cache) proxycfg.CARoots { return &cacheProxyDataSource[*structs.DCSpecificRequest]{c, cachetype.ConnectCARootName} } -// CacheCompiledDiscoveryChain satisfies the proxycfg.CompiledDiscoveryChain -// interface by sourcing data from the agent cache. -func CacheCompiledDiscoveryChain(c *cache.Cache) proxycfg.CompiledDiscoveryChain { - return &cacheProxyDataSource[*structs.DiscoveryChainRequest]{c, cachetype.CompiledDiscoveryChainName} -} - // CacheConfigEntry satisfies the proxycfg.ConfigEntry interface by sourcing // data from the agent cache. func CacheConfigEntry(c *cache.Cache) proxycfg.ConfigEntry { @@ -52,16 +56,10 @@ func CacheDatacenters(c *cache.Cache) proxycfg.Datacenters { return &cacheProxyDataSource[*structs.DatacentersRequest]{c, cachetype.CatalogDatacentersName} } -// CacheFederationStateListMeshGateways satisfies the proxycfg.FederationStateListMeshGateways -// interface by sourcing data from the agent cache. -func CacheFederationStateListMeshGateways(c *cache.Cache) proxycfg.FederationStateListMeshGateways { - return &cacheProxyDataSource[*structs.DCSpecificRequest]{c, cachetype.FederationStateListMeshGatewaysName} -} - -// CacheGatewayServices satisfies the proxycfg.GatewayServices interface by +// CacheServiceGateways satisfies the proxycfg.ServiceGateways interface by // sourcing data from the agent cache. -func CacheGatewayServices(c *cache.Cache) proxycfg.GatewayServices { - return &cacheProxyDataSource[*structs.ServiceSpecificRequest]{c, cachetype.GatewayServicesName} +func CacheServiceGateways(c *cache.Cache) proxycfg.GatewayServices { + return &cacheProxyDataSource[*structs.ServiceSpecificRequest]{c, cachetype.ServiceGatewaysName} } // CacheHTTPChecks satisifies the proxycfg.HTTPChecks interface by sourcing @@ -76,6 +74,12 @@ func CacheIntentionUpstreams(c *cache.Cache) proxycfg.IntentionUpstreams { return &cacheProxyDataSource[*structs.ServiceSpecificRequest]{c, cachetype.IntentionUpstreamsName} } +// CacheIntentionUpstreamsDestination satisfies the proxycfg.IntentionUpstreamsDestination interface +// by sourcing data from the agent cache. +func CacheIntentionUpstreamsDestination(c *cache.Cache) proxycfg.IntentionUpstreams { + return &cacheProxyDataSource[*structs.ServiceSpecificRequest]{c, cachetype.IntentionUpstreamsDestinationName} +} + // CacheInternalServiceDump satisfies the proxycfg.InternalServiceDump // interface by sourcing data from the agent cache. func CacheInternalServiceDump(c *cache.Cache) proxycfg.InternalServiceDump { @@ -88,12 +92,6 @@ func CacheLeafCertificate(c *cache.Cache) proxycfg.LeafCertificate { return &cacheProxyDataSource[*cachetype.ConnectCALeafRequest]{c, cachetype.ConnectCALeafName} } -// CachePeeredUpstreams satisfies the proxycfg.PeeredUpstreams interface -// by sourcing data from the agent cache. -func CachePeeredUpstreams(c *cache.Cache) proxycfg.PeeredUpstreams { - return &cacheProxyDataSource[*structs.PartitionSpecificRequest]{c, cachetype.PeeredUpstreamsName} -} - // CachePrepraredQuery satisfies the proxycfg.PreparedQuery interface by // sourcing data from the agent cache. func CachePrepraredQuery(c *cache.Cache) proxycfg.PreparedQuery { @@ -106,30 +104,6 @@ func CacheResolvedServiceConfig(c *cache.Cache) proxycfg.ResolvedServiceConfig { return &cacheProxyDataSource[*structs.ServiceConfigRequest]{c, cachetype.ResolvedServiceConfigName} } -// CacheServiceList satisfies the proxycfg.ServiceList interface by sourcing -// data from the agent cache. -func CacheServiceList(c *cache.Cache) proxycfg.ServiceList { - return &cacheProxyDataSource[*structs.DCSpecificRequest]{c, cachetype.CatalogServiceListName} -} - -// CacheTrustBundle satisfies the proxycfg.TrustBundle interface by sourcing -// data from the agent cache. -func CacheTrustBundle(c *cache.Cache) proxycfg.TrustBundle { - return &cacheProxyDataSource[*pbpeering.TrustBundleReadRequest]{c, cachetype.TrustBundleReadName} -} - -// CacheTrustBundleList satisfies the proxycfg.TrustBundleList interface by sourcing -// data from the agent cache. -func CacheTrustBundleList(c *cache.Cache) proxycfg.TrustBundleList { - return &cacheProxyDataSource[*pbpeering.TrustBundleListByServiceRequest]{c, cachetype.TrustBundleListName} -} - -// CacheExportedPeeredServices satisfies the proxycfg.ExportedPeeredServices -// interface by sourcing data from the agent cache. -func CacheExportedPeeredServices(c *cache.Cache) proxycfg.ExportedPeeredServices { - return &cacheProxyDataSource[*structs.DCSpecificRequest]{c, cachetype.ExportedPeeredServicesName} -} - // cacheProxyDataSource implements a generic wrapper around the agent cache to // provide data to the proxycfg.Manager. type cacheProxyDataSource[ReqType cache.Request] struct { @@ -148,25 +122,6 @@ func (c *cacheProxyDataSource[ReqType]) Notify( return c.c.NotifyCallback(ctx, c.t, req, correlationID, dispatchCacheUpdate(ch)) } -// Health wraps health.Client so that the proxycfg package doesn't need to -// reference cache.UpdateEvent directly. -func Health(client *health.Client) proxycfg.Health { - return &healthWrapper{client} -} - -type healthWrapper struct { - client *health.Client -} - -func (h *healthWrapper) Notify( - ctx context.Context, - req *structs.ServiceSpecificRequest, - correlationID string, - ch chan<- proxycfg.UpdateEvent, -) error { - return h.client.Notify(ctx, *req, correlationID, dispatchCacheUpdate(ch)) -} - func dispatchCacheUpdate(ch chan<- proxycfg.UpdateEvent) cache.Callback { return func(ctx context.Context, e cache.UpdateEvent) { u := proxycfg.UpdateEvent{ diff --git a/agent/proxycfg-glue/health.go b/agent/proxycfg-glue/health.go new file mode 100644 index 000000000..331c8012b --- /dev/null +++ b/agent/proxycfg-glue/health.go @@ -0,0 +1,82 @@ +package proxycfgglue + +import ( + "context" + + "github.com/hashicorp/consul/agent/cache" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/rpcclient/health" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/submatview" +) + +// ClientHealth satisfies the proxycfg.Health interface by sourcing data from +// the given health.Client. +func ClientHealth(client *health.Client) proxycfg.Health { + return &clientHealth{client} +} + +type clientHealth struct { + client *health.Client +} + +func (h *clientHealth) Notify( + ctx context.Context, + req *structs.ServiceSpecificRequest, + correlationID string, + ch chan<- proxycfg.UpdateEvent, +) error { + return h.client.Notify(ctx, *req, correlationID, dispatchCacheUpdate(ch)) +} + +// ServerHealth satisfies the proxycfg.Health interface by sourcing data from +// a local materialized view (backed by an EventPublisher subscription). +// +// Requests for services in remote datacenters will be delegated to the given +// remoteSource (i.e. ClientHealth). +func ServerHealth(deps ServerDataSourceDeps, remoteSource proxycfg.Health) proxycfg.Health { + return &serverHealth{deps, remoteSource} +} + +type serverHealth struct { + deps ServerDataSourceDeps + remoteSource proxycfg.Health +} + +func (h *serverHealth) Notify(ctx context.Context, req *structs.ServiceSpecificRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + if req.Datacenter != h.deps.Datacenter { + return h.remoteSource.Notify(ctx, req, correlationID, ch) + } + + return h.deps.ViewStore.NotifyCallback( + ctx, + &healthRequest{h.deps, *req}, + correlationID, + dispatchCacheUpdate(ch), + ) +} + +type healthRequest struct { + deps ServerDataSourceDeps + req structs.ServiceSpecificRequest +} + +func (r *healthRequest) CacheInfo() cache.RequestInfo { return r.req.CacheInfo() } + +func (r *healthRequest) NewMaterializer() (submatview.Materializer, error) { + view, err := health.NewHealthView(r.req) + if err != nil { + return nil, err + } + return submatview.NewLocalMaterializer(submatview.LocalMaterializerDeps{ + Backend: r.deps.EventPublisher, + ACLResolver: r.deps.ACLResolver, + Deps: submatview.Deps{ + View: view, + Logger: r.deps.Logger, + Request: health.NewMaterializerRequest(r.req), + }, + }), nil +} + +func (r *healthRequest) Type() string { return "proxycfgglue.Health" } diff --git a/agent/proxycfg-glue/health_test.go b/agent/proxycfg-glue/health_test.go new file mode 100644 index 000000000..b4e6035ee --- /dev/null +++ b/agent/proxycfg-glue/health_test.go @@ -0,0 +1,149 @@ +package proxycfgglue + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/consul/stream" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/submatview" + "github.com/hashicorp/consul/proto/pbsubscribe" + "github.com/hashicorp/consul/sdk/testutil" +) + +func TestServerHealth(t *testing.T) { + t.Run("remote queries are delegated to the remote source", func(t *testing.T) { + var ( + ctx = context.Background() + req = &structs.ServiceSpecificRequest{Datacenter: "dc2"} + correlationID = "correlation-id" + ch = make(chan<- proxycfg.UpdateEvent) + result = errors.New("KABOOM") + ) + + remoteSource := newMockHealth(t) + remoteSource.On("Notify", ctx, req, correlationID, ch).Return(result) + + dataSource := ServerHealth(ServerDataSourceDeps{Datacenter: "dc1"}, remoteSource) + err := dataSource.Notify(ctx, req, correlationID, ch) + require.Equal(t, result, err) + }) + + t.Run("local queries are served from a materialized view", func(t *testing.T) { + // Note: the view is tested more thoroughly in the agent/rpcclient/health + // package, so this is more of a high-level integration test with the local + // materializer. + const ( + index uint64 = 123 + datacenter = "dc1" + serviceName = "web" + ) + + logger := testutil.Logger(t) + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + store := submatview.NewStore(logger) + go store.Run(ctx) + + publisher := stream.NewEventPublisher(10 * time.Second) + publisher.RegisterHandler(pbsubscribe.Topic_ServiceHealth, + func(stream.SubscribeRequest, stream.SnapshotAppender) (uint64, error) { return index, nil }, + true) + go publisher.Run(ctx) + + dataSource := ServerHealth(ServerDataSourceDeps{ + Datacenter: datacenter, + ACLResolver: newStaticResolver(acl.ManageAll()), + ViewStore: store, + EventPublisher: publisher, + Logger: logger, + }, nil) + + eventCh := make(chan proxycfg.UpdateEvent) + require.NoError(t, dataSource.Notify(ctx, &structs.ServiceSpecificRequest{ + Datacenter: datacenter, + ServiceName: serviceName, + }, "", eventCh)) + + testutil.RunStep(t, "initial state", func(t *testing.T) { + result := getEventResult[*structs.IndexedCheckServiceNodes](t, eventCh) + require.Empty(t, result.Nodes) + }) + + testutil.RunStep(t, "register services", func(t *testing.T) { + publisher.Publish([]stream.Event{ + { + Index: index + 1, + Topic: pbsubscribe.Topic_ServiceHealth, + Payload: &state.EventPayloadCheckServiceNode{ + Op: pbsubscribe.CatalogOp_Register, + Value: &structs.CheckServiceNode{ + Node: &structs.Node{Node: "node1"}, + Service: &structs.NodeService{Service: serviceName}, + }, + }, + }, + { + Index: index + 1, + Topic: pbsubscribe.Topic_ServiceHealth, + Payload: &state.EventPayloadCheckServiceNode{ + Op: pbsubscribe.CatalogOp_Register, + Value: &structs.CheckServiceNode{ + Node: &structs.Node{Node: "node2"}, + Service: &structs.NodeService{Service: serviceName}, + }, + }, + }, + }) + + result := getEventResult[*structs.IndexedCheckServiceNodes](t, eventCh) + require.Len(t, result.Nodes, 2) + }) + + testutil.RunStep(t, "deregister service", func(t *testing.T) { + publisher.Publish([]stream.Event{ + { + Index: index + 2, + Topic: pbsubscribe.Topic_ServiceHealth, + Payload: &state.EventPayloadCheckServiceNode{ + Op: pbsubscribe.CatalogOp_Deregister, + Value: &structs.CheckServiceNode{ + Node: &structs.Node{Node: "node2"}, + Service: &structs.NodeService{Service: serviceName}, + }, + }, + }, + }) + + result := getEventResult[*structs.IndexedCheckServiceNodes](t, eventCh) + require.Len(t, result.Nodes, 1) + }) + }) +} + +func newMockHealth(t *testing.T) *mockHealth { + mock := &mockHealth{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} + +type mockHealth struct { + mock.Mock +} + +func (m *mockHealth) Notify(ctx context.Context, req *structs.ServiceSpecificRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + return m.Called(ctx, req, correlationID, ch).Error(0) +} diff --git a/agent/proxycfg-glue/helpers_test.go b/agent/proxycfg-glue/helpers_test.go new file mode 100644 index 000000000..7a0c67df1 --- /dev/null +++ b/agent/proxycfg-glue/helpers_test.go @@ -0,0 +1,56 @@ +package proxycfgglue + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/agent/proxycfg" +) + +func indexGenerator() func() uint64 { + var idx uint64 + return func() uint64 { + idx++ + return idx + } +} + +func getEventResult[ResultType any](t *testing.T, eventCh <-chan proxycfg.UpdateEvent) ResultType { + t.Helper() + + select { + case event := <-eventCh: + require.NoError(t, event.Err, "event should not have an error") + result, ok := event.Result.(ResultType) + require.Truef(t, ok, "unexpected result type: %T", event.Result) + return result + case <-time.After(100 * time.Millisecond): + t.Fatal("timeout waiting for event") + } + + panic("this should never be reached") +} + +func expectNoEvent(t *testing.T, eventCh <-chan proxycfg.UpdateEvent) { + select { + case <-eventCh: + t.Fatal("expected no event") + case <-time.After(100 * time.Millisecond): + } +} + +func getEventError(t *testing.T, eventCh <-chan proxycfg.UpdateEvent) error { + t.Helper() + + select { + case event := <-eventCh: + require.Error(t, event.Err) + return event.Err + case <-time.After(100 * time.Millisecond): + t.Fatal("timeout waiting for event") + } + + panic("this should never be reached") +} diff --git a/agent/proxycfg-glue/intention_upstreams_test.go b/agent/proxycfg-glue/intention_upstreams_test.go index 22d596109..22846f24d 100644 --- a/agent/proxycfg-glue/intention_upstreams_test.go +++ b/agent/proxycfg-glue/intention_upstreams_test.go @@ -3,7 +3,6 @@ package proxycfgglue import ( "context" "testing" - "time" "github.com/stretchr/testify/require" @@ -62,7 +61,7 @@ func TestServerIntentionUpstreams(t *testing.T) { authz := policyAuthorizer(t, `service "db" { policy = "read" }`) dataSource := ServerIntentionUpstreams(ServerDataSourceDeps{ - ACLResolver: staticResolver{authz}, + ACLResolver: newStaticResolver(authz), GetStore: func() Store { return store }, }) @@ -70,28 +69,16 @@ func TestServerIntentionUpstreams(t *testing.T) { err := dataSource.Notify(ctx, &structs.ServiceSpecificRequest{ServiceName: serviceName}, "", ch) require.NoError(t, err) - select { - case event := <-ch: - result, ok := event.Result.(*structs.IndexedServiceList) - require.Truef(t, ok, "expected IndexedServiceList, got: %T", event.Result) - require.Len(t, result.Services, 0) - case <-time.After(100 * time.Millisecond): - t.Fatal("timeout waiting for event") - } + result := getEventResult[*structs.IndexedServiceList](t, ch) + require.Len(t, result.Services, 0) // Create an allow intention for the db service. This should *not* be filtered // out because the ACL token *does* have read access on it. createIntention("db") - select { - case event := <-ch: - result, ok := event.Result.(*structs.IndexedServiceList) - require.Truef(t, ok, "expected IndexedServiceList, got: %T", event.Result) - require.Len(t, result.Services, 1) - require.Equal(t, "db", result.Services[0].Name) - case <-time.After(100 * time.Millisecond): - t.Fatal("timeout waiting for event") - } + result = getEventResult[*structs.IndexedServiceList](t, ch) + require.Len(t, result.Services, 1) + require.Equal(t, "db", result.Services[0].Name) } func disableLegacyIntentions(t *testing.T, store *state.Store) { diff --git a/agent/proxycfg-glue/intentions_ent_test.go b/agent/proxycfg-glue/intentions_ent_test.go index 66f3d62cb..00eb37285 100644 --- a/agent/proxycfg-glue/intentions_ent_test.go +++ b/agent/proxycfg-glue/intentions_ent_test.go @@ -15,6 +15,7 @@ import ( "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/submatview" "github.com/hashicorp/consul/proto/pbsubscribe" + "github.com/hashicorp/consul/sdk/testutil" ) func TestServerIntentions_Enterprise(t *testing.T) { @@ -39,7 +40,7 @@ func TestServerIntentions_Enterprise(t *testing.T) { go publisher.Run(ctx) intentions := ServerIntentions(ServerDataSourceDeps{ - ACLResolver: staticResolver{acl.ManageAll()}, + ACLResolver: newStaticResolver(acl.ManageAll()), ViewStore: store, EventPublisher: publisher, Logger: logger, @@ -51,37 +52,29 @@ func TestServerIntentions_Enterprise(t *testing.T) { ServiceName: serviceName, }, "", eventCh)) - // Wait for the initial snapshots. - select { - case <-eventCh: - case <-time.After(100 * time.Millisecond): - t.Fatal("timeout waiting for event") - } + testutil.RunStep(t, "initial snapshot", func(t *testing.T) { + getEventResult[structs.Intentions](t, eventCh) + }) - // Publish a namespace wildcard intention. - publisher.Publish([]stream.Event{ - { - Topic: pbsubscribe.Topic_ServiceIntentions, - Index: index + 1, - Payload: state.EventPayloadConfigEntry{ - Op: pbsubscribe.ConfigEntryUpdate_Upsert, - Value: &structs.ServiceIntentionsConfigEntry{ - Name: structs.WildcardSpecifier, - EnterpriseMeta: *acl.WildcardEnterpriseMeta(), - Sources: []*structs.SourceIntention{ - {Name: structs.WildcardSpecifier, Action: structs.IntentionActionAllow, Precedence: 1}, + testutil.RunStep(t, "publish a namespace-wildcard partition", func(t *testing.T) { + publisher.Publish([]stream.Event{ + { + Topic: pbsubscribe.Topic_ServiceIntentions, + Index: index + 1, + Payload: state.EventPayloadConfigEntry{ + Op: pbsubscribe.ConfigEntryUpdate_Upsert, + Value: &structs.ServiceIntentionsConfigEntry{ + Name: structs.WildcardSpecifier, + EnterpriseMeta: *acl.WildcardEnterpriseMeta(), + Sources: []*structs.SourceIntention{ + {Name: structs.WildcardSpecifier, Action: structs.IntentionActionAllow, Precedence: 1}, + }, }, }, }, - }, - }) + }) - select { - case event := <-eventCh: - result, ok := event.Result.(structs.Intentions) - require.Truef(t, ok, "expected Intentions, got: %T", event.Result) + result := getEventResult[structs.Intentions](t, eventCh) require.Len(t, result, 1) - case <-time.After(100 * time.Millisecond): - t.Fatal("timeout waiting for event") - } + }) } diff --git a/agent/proxycfg-glue/intentions_test.go b/agent/proxycfg-glue/intentions_test.go index 0284068bd..3597109f7 100644 --- a/agent/proxycfg-glue/intentions_test.go +++ b/agent/proxycfg-glue/intentions_test.go @@ -2,6 +2,7 @@ package proxycfgglue import ( "context" + "sync" "testing" "time" @@ -16,6 +17,7 @@ import ( "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/submatview" "github.com/hashicorp/consul/proto/pbsubscribe" + "github.com/hashicorp/consul/sdk/testutil" ) func TestServerIntentions(t *testing.T) { @@ -39,7 +41,7 @@ func TestServerIntentions(t *testing.T) { go publisher.Run(ctx) intentions := ServerIntentions(ServerDataSourceDeps{ - ACLResolver: staticResolver{acl.ManageAll()}, + ACLResolver: newStaticResolver(acl.ManageAll()), ViewStore: store, EventPublisher: publisher, Logger: logger, @@ -51,64 +53,53 @@ func TestServerIntentions(t *testing.T) { EnterpriseMeta: *acl.DefaultEnterpriseMeta(), }, "", eventCh)) - // Wait for the initial snapshots. - select { - case <-eventCh: - case <-time.After(100 * time.Millisecond): - t.Fatal("timeout waiting for event") - } + testutil.RunStep(t, "initial snapshot", func(t *testing.T) { + getEventResult[structs.Intentions](t, eventCh) + }) - // Publish an explicit intention on the service. - publisher.Publish([]stream.Event{ - { - Topic: pbsubscribe.Topic_ServiceIntentions, - Index: index + 1, - Payload: state.EventPayloadConfigEntry{ - Op: pbsubscribe.ConfigEntryUpdate_Upsert, - Value: &structs.ServiceIntentionsConfigEntry{ - Name: serviceName, - Sources: []*structs.SourceIntention{ - {Name: "db", Action: structs.IntentionActionAllow, Precedence: 1}, + testutil.RunStep(t, "publishing an explicit intention", func(t *testing.T) { + publisher.Publish([]stream.Event{ + { + Topic: pbsubscribe.Topic_ServiceIntentions, + Index: index + 1, + Payload: state.EventPayloadConfigEntry{ + Op: pbsubscribe.ConfigEntryUpdate_Upsert, + Value: &structs.ServiceIntentionsConfigEntry{ + Name: serviceName, + Sources: []*structs.SourceIntention{ + {Name: "db", Action: structs.IntentionActionAllow, Precedence: 1}, + }, }, }, }, - }, - }) + }) - select { - case event := <-eventCh: - result, ok := event.Result.(structs.Intentions) - require.Truef(t, ok, "expected Intentions, got: %T", event.Result) + result := getEventResult[structs.Intentions](t, eventCh) require.Len(t, result, 1) intention := result[0] require.Equal(t, intention.DestinationName, serviceName) require.Equal(t, intention.SourceName, "db") - case <-time.After(100 * time.Millisecond): - t.Fatal("timeout waiting for event") - } + }) - // Publish a wildcard intention. - publisher.Publish([]stream.Event{ - { - Topic: pbsubscribe.Topic_ServiceIntentions, - Index: index + 2, - Payload: state.EventPayloadConfigEntry{ - Op: pbsubscribe.ConfigEntryUpdate_Upsert, - Value: &structs.ServiceIntentionsConfigEntry{ - Name: structs.WildcardSpecifier, - Sources: []*structs.SourceIntention{ - {Name: structs.WildcardSpecifier, Action: structs.IntentionActionAllow, Precedence: 0}, + testutil.RunStep(t, "publishing a wildcard intention", func(t *testing.T) { + publisher.Publish([]stream.Event{ + { + Topic: pbsubscribe.Topic_ServiceIntentions, + Index: index + 2, + Payload: state.EventPayloadConfigEntry{ + Op: pbsubscribe.ConfigEntryUpdate_Upsert, + Value: &structs.ServiceIntentionsConfigEntry{ + Name: structs.WildcardSpecifier, + Sources: []*structs.SourceIntention{ + {Name: structs.WildcardSpecifier, Action: structs.IntentionActionAllow, Precedence: 0}, + }, }, }, }, - }, - }) + }) - select { - case event := <-eventCh: - result, ok := event.Result.(structs.Intentions) - require.Truef(t, ok, "expected Intentions, got: %T", event.Result) + result := getEventResult[structs.Intentions](t, eventCh) require.Len(t, result, 2) a := result[0] @@ -118,38 +109,48 @@ func TestServerIntentions(t *testing.T) { b := result[1] require.Equal(t, b.DestinationName, structs.WildcardSpecifier) require.Equal(t, b.SourceName, structs.WildcardSpecifier) - case <-time.After(100 * time.Millisecond): - t.Fatal("timeout waiting for event") - } - - // Publish a delete event and observe the intention is removed from the results. - publisher.Publish([]stream.Event{ - { - Topic: pbsubscribe.Topic_ServiceIntentions, - Index: index + 3, - Payload: state.EventPayloadConfigEntry{ - Op: pbsubscribe.ConfigEntryUpdate_Delete, - Value: &structs.ServiceIntentionsConfigEntry{ - Name: serviceName, - }, - }, - }, }) - select { - case event := <-eventCh: - result, ok := event.Result.(structs.Intentions) - require.Truef(t, ok, "expected Intentions, got: %T", event.Result) + testutil.RunStep(t, "publishing a delete event", func(t *testing.T) { + publisher.Publish([]stream.Event{ + { + Topic: pbsubscribe.Topic_ServiceIntentions, + Index: index + 3, + Payload: state.EventPayloadConfigEntry{ + Op: pbsubscribe.ConfigEntryUpdate_Delete, + Value: &structs.ServiceIntentionsConfigEntry{ + Name: serviceName, + }, + }, + }, + }) + + result := getEventResult[structs.Intentions](t, eventCh) require.Len(t, result, 1) - case <-time.After(100 * time.Millisecond): - t.Fatal("timeout waiting for event") - } + }) + } type staticResolver struct { + mu sync.Mutex authorizer acl.Authorizer } -func (r staticResolver) ResolveTokenAndDefaultMeta(token string, entMeta *acl.EnterpriseMeta, authzContext *acl.AuthorizerContext) (resolver.Result, error) { +func newStaticResolver(authz acl.Authorizer) *staticResolver { + resolver := new(staticResolver) + resolver.SwapAuthorizer(authz) + return resolver +} + +func (r *staticResolver) SwapAuthorizer(authz acl.Authorizer) { + r.mu.Lock() + defer r.mu.Unlock() + + r.authorizer = authz +} + +func (r *staticResolver) ResolveTokenAndDefaultMeta(token string, entMeta *acl.EnterpriseMeta, authzContext *acl.AuthorizerContext) (resolver.Result, error) { + r.mu.Lock() + defer r.mu.Unlock() return resolver.Result{Authorizer: r.authorizer}, nil } diff --git a/agent/proxycfg-glue/peered_upstreams.go b/agent/proxycfg-glue/peered_upstreams.go new file mode 100644 index 000000000..4d3e85f81 --- /dev/null +++ b/agent/proxycfg-glue/peered_upstreams.go @@ -0,0 +1,55 @@ +package proxycfgglue + +import ( + "context" + + "github.com/hashicorp/go-memdb" + + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/consul/watch" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" +) + +// CachePeeredUpstreams satisfies the proxycfg.PeeredUpstreams interface +// by sourcing data from the agent cache. +func CachePeeredUpstreams(c *cache.Cache) proxycfg.PeeredUpstreams { + return &cacheProxyDataSource[*structs.PartitionSpecificRequest]{c, cachetype.PeeredUpstreamsName} +} + +// ServerPeeredUpstreams satisfies the proxycfg.PeeredUpstreams interface by +// sourcing data from a blocking query against the server's state store. +func ServerPeeredUpstreams(deps ServerDataSourceDeps) proxycfg.PeeredUpstreams { + return &serverPeeredUpstreams{deps} +} + +type serverPeeredUpstreams struct { + deps ServerDataSourceDeps +} + +func (s *serverPeeredUpstreams) Notify(ctx context.Context, req *structs.PartitionSpecificRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + // TODO(peering): ACL filtering. + return watch.ServerLocalNotify(ctx, correlationID, s.deps.GetStore, + func(ws memdb.WatchSet, store Store) (uint64, *structs.IndexedPeeredServiceList, error) { + index, vips, err := store.VirtualIPsForAllImportedServices(ws, req.EnterpriseMeta) + if err != nil { + return 0, nil, err + } + + result := make([]structs.PeeredServiceName, 0, len(vips)) + for _, vip := range vips { + result = append(result, vip.Service) + } + + return index, &structs.IndexedPeeredServiceList{ + Services: result, + QueryMeta: structs.QueryMeta{ + Index: index, + Backend: structs.QueryBackendBlocking, + }, + }, nil + }, + dispatchBlockingQueryUpdate[*structs.IndexedPeeredServiceList](ch), + ) +} diff --git a/agent/proxycfg-glue/peered_upstreams_test.go b/agent/proxycfg-glue/peered_upstreams_test.go new file mode 100644 index 000000000..c2faa44da --- /dev/null +++ b/agent/proxycfg-glue/peered_upstreams_test.go @@ -0,0 +1,88 @@ +package proxycfgglue + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/sdk/testutil" +) + +func TestServerPeeredUpstreams(t *testing.T) { + const ( + index uint64 = 123 + nodeName = "node-1" + ) + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + store := state.NewStateStore(nil) + enableVirtualIPs(t, store) + + registerService := func(t *testing.T, index uint64, peerName, serviceName string) { + require.NoError(t, store.EnsureRegistration(index, &structs.RegisterRequest{ + Node: nodeName, + Service: &structs.NodeService{Service: serviceName, ID: serviceName}, + PeerName: peerName, + EnterpriseMeta: *acl.DefaultEnterpriseMeta(), + })) + + require.NoError(t, store.EnsureRegistration(index, &structs.RegisterRequest{ + Node: nodeName, + Service: &structs.NodeService{ + Service: fmt.Sprintf("%s-proxy", serviceName), + Kind: structs.ServiceKindConnectProxy, + Proxy: structs.ConnectProxyConfig{ + DestinationServiceName: serviceName, + }, + }, + PeerName: peerName, + EnterpriseMeta: *acl.DefaultEnterpriseMeta(), + })) + } + + registerService(t, index, "peer-1", "web") + + eventCh := make(chan proxycfg.UpdateEvent) + dataSource := ServerPeeredUpstreams(ServerDataSourceDeps{ + GetStore: func() Store { return store }, + }) + require.NoError(t, dataSource.Notify(ctx, &structs.PartitionSpecificRequest{EnterpriseMeta: *acl.DefaultEnterpriseMeta()}, "", eventCh)) + + testutil.RunStep(t, "initial state", func(t *testing.T) { + result := getEventResult[*structs.IndexedPeeredServiceList](t, eventCh) + require.Len(t, result.Services, 1) + require.Equal(t, "peer-1", result.Services[0].Peer) + require.Equal(t, "web", result.Services[0].ServiceName.Name) + }) + + testutil.RunStep(t, "register another service", func(t *testing.T) { + registerService(t, index+1, "peer-2", "db") + + result := getEventResult[*structs.IndexedPeeredServiceList](t, eventCh) + require.Len(t, result.Services, 2) + }) + + testutil.RunStep(t, "deregister service", func(t *testing.T) { + require.NoError(t, store.DeleteService(index+2, nodeName, "web", acl.DefaultEnterpriseMeta(), "peer-1")) + + result := getEventResult[*structs.IndexedPeeredServiceList](t, eventCh) + require.Len(t, result.Services, 1) + }) +} + +func enableVirtualIPs(t *testing.T, store *state.Store) { + t.Helper() + + require.NoError(t, store.SystemMetadataSet(0, &structs.SystemMetadataEntry{ + Key: structs.SystemMetadataVirtualIPsEnabled, + Value: "true", + })) +} diff --git a/agent/proxycfg-glue/service_list.go b/agent/proxycfg-glue/service_list.go new file mode 100644 index 000000000..14dc13f31 --- /dev/null +++ b/agent/proxycfg-glue/service_list.go @@ -0,0 +1,124 @@ +package proxycfgglue + +import ( + "context" + "sort" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/submatview" + "github.com/hashicorp/consul/proto/pbcommon" + "github.com/hashicorp/consul/proto/pbsubscribe" +) + +// CacheServiceList satisfies the proxycfg.ServiceList interface by sourcing +// data from the agent cache. +func CacheServiceList(c *cache.Cache) proxycfg.ServiceList { + return &cacheProxyDataSource[*structs.DCSpecificRequest]{c, cachetype.CatalogServiceListName} +} + +func ServerServiceList(deps ServerDataSourceDeps, remoteSource proxycfg.ServiceList) proxycfg.ServiceList { + return &serverServiceList{deps, remoteSource} +} + +type serverServiceList struct { + deps ServerDataSourceDeps + remoteSource proxycfg.ServiceList +} + +func (s *serverServiceList) Notify(ctx context.Context, req *structs.DCSpecificRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + if req.Datacenter != s.deps.Datacenter { + return s.remoteSource.Notify(ctx, req, correlationID, ch) + } + return s.deps.ViewStore.NotifyCallback( + ctx, + &serviceListRequest{s.deps, req}, + correlationID, + dispatchCacheUpdate(ch), + ) +} + +type serviceListRequest struct { + deps ServerDataSourceDeps + req *structs.DCSpecificRequest +} + +func (r *serviceListRequest) Request(index uint64) *pbsubscribe.SubscribeRequest { + return &pbsubscribe.SubscribeRequest{ + Topic: pbsubscribe.Topic_ServiceList, + Subject: &pbsubscribe.SubscribeRequest_WildcardSubject{WildcardSubject: true}, + Index: index, + Datacenter: r.req.Datacenter, + Token: r.req.QueryOptions.Token, + } +} + +func (r *serviceListRequest) CacheInfo() cache.RequestInfo { return r.req.CacheInfo() } + +func (r *serviceListRequest) NewMaterializer() (submatview.Materializer, error) { + return submatview.NewLocalMaterializer(submatview.LocalMaterializerDeps{ + Backend: r.deps.EventPublisher, + ACLResolver: r.deps.ACLResolver, + Deps: submatview.Deps{ + View: newServiceListView(r.req.EnterpriseMeta), + Logger: r.deps.Logger, + Request: r.Request, + }, + }), nil +} + +func (serviceListRequest) Type() string { return "proxycfgglue.ServiceList" } + +func newServiceListView(entMeta acl.EnterpriseMeta) *serviceListView { + view := &serviceListView{entMeta: entMeta} + view.Reset() + return view +} + +type serviceListView struct { + entMeta acl.EnterpriseMeta + state map[string]structs.ServiceName +} + +func (v *serviceListView) Reset() { v.state = make(map[string]structs.ServiceName) } + +func (v *serviceListView) Update(events []*pbsubscribe.Event) error { + for _, event := range filterByEnterpriseMeta(events, v.entMeta) { + update := event.GetService() + if update == nil { + continue + } + + var entMeta acl.EnterpriseMeta + pbcommon.EnterpriseMetaToStructs(update.EnterpriseMeta, &entMeta) + name := structs.NewServiceName(update.Name, &entMeta) + + switch update.Op { + case pbsubscribe.CatalogOp_Register: + v.state[name.String()] = name + case pbsubscribe.CatalogOp_Deregister: + delete(v.state, name.String()) + } + } + return nil +} + +func (v *serviceListView) Result(index uint64) any { + serviceList := make(structs.ServiceList, 0, len(v.state)) + for _, name := range v.state { + serviceList = append(serviceList, name) + } + sort.Slice(serviceList, func(a, b int) bool { + return serviceList[a].String() < serviceList[b].String() + }) + return &structs.IndexedServiceList{ + Services: serviceList, + QueryMeta: structs.QueryMeta{ + Backend: structs.QueryBackendStreaming, + Index: index, + }, + } +} diff --git a/agent/proxycfg-glue/service_list_test.go b/agent/proxycfg-glue/service_list_test.go new file mode 100644 index 000000000..eedb211b3 --- /dev/null +++ b/agent/proxycfg-glue/service_list_test.go @@ -0,0 +1,140 @@ +package proxycfgglue + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/consul/stream" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/submatview" + "github.com/hashicorp/consul/proto/pbsubscribe" + "github.com/hashicorp/consul/sdk/testutil" +) + +func TestServerServiceList(t *testing.T) { + t.Run("remote queries are delegated to the remote source", func(t *testing.T) { + var ( + ctx = context.Background() + req = &structs.DCSpecificRequest{Datacenter: "dc2"} + correlationID = "correlation-id" + ch = make(chan<- proxycfg.UpdateEvent) + result = errors.New("KABOOM") + ) + + remoteSource := newMockServiceList(t) + remoteSource.On("Notify", ctx, req, correlationID, ch).Return(result) + + dataSource := ServerServiceList(ServerDataSourceDeps{Datacenter: "dc1"}, remoteSource) + err := dataSource.Notify(ctx, req, correlationID, ch) + require.Equal(t, result, err) + }) + + t.Run("local queries are served from a materialized view", func(t *testing.T) { + const ( + index uint64 = 123 + datacenter = "dc1" + ) + + logger := testutil.Logger(t) + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + store := submatview.NewStore(logger) + go store.Run(ctx) + + publisher := stream.NewEventPublisher(10 * time.Second) + publisher.RegisterHandler(pbsubscribe.Topic_ServiceList, + func(stream.SubscribeRequest, stream.SnapshotAppender) (uint64, error) { return index, nil }, + true) + go publisher.Run(ctx) + + dataSource := ServerServiceList(ServerDataSourceDeps{ + Datacenter: datacenter, + ACLResolver: newStaticResolver(acl.ManageAll()), + ViewStore: store, + EventPublisher: publisher, + Logger: logger, + }, nil) + + eventCh := make(chan proxycfg.UpdateEvent) + require.NoError(t, dataSource.Notify(ctx, &structs.DCSpecificRequest{Datacenter: datacenter}, "", eventCh)) + + testutil.RunStep(t, "initial state", func(t *testing.T) { + result := getEventResult[*structs.IndexedServiceList](t, eventCh) + require.Empty(t, result.Services) + }) + + testutil.RunStep(t, "register services", func(t *testing.T) { + publisher.Publish([]stream.Event{ + { + Index: index + 1, + Topic: pbsubscribe.Topic_ServiceList, + Payload: &state.EventPayloadServiceListUpdate{ + Op: pbsubscribe.CatalogOp_Register, + Name: "web", + }, + }, + { + Index: index + 1, + Topic: pbsubscribe.Topic_ServiceList, + Payload: &state.EventPayloadServiceListUpdate{ + Op: pbsubscribe.CatalogOp_Register, + Name: "db", + }, + }, + }) + + result := getEventResult[*structs.IndexedServiceList](t, eventCh) + require.Len(t, result.Services, 2) + + var names []string + for _, service := range result.Services { + names = append(names, service.Name) + } + require.ElementsMatch(t, names, []string{"web", "db"}) + }) + + testutil.RunStep(t, "deregister service", func(t *testing.T) { + publisher.Publish([]stream.Event{ + { + Index: index + 2, + Topic: pbsubscribe.Topic_ServiceList, + Payload: &state.EventPayloadServiceListUpdate{ + Op: pbsubscribe.CatalogOp_Deregister, + Name: "web", + }, + }, + }) + + result := getEventResult[*structs.IndexedServiceList](t, eventCh) + require.Len(t, result.Services, 1) + require.Equal(t, "db", result.Services[0].Name) + }) + }) +} + +func newMockServiceList(t *testing.T) *mockServiceList { + mock := &mockServiceList{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} + +type mockServiceList struct { + mock.Mock +} + +func (m *mockServiceList) Notify(ctx context.Context, req *structs.DCSpecificRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + return m.Called(ctx, req, correlationID, ch).Error(0) +} diff --git a/agent/proxycfg-glue/trust_bundle.go b/agent/proxycfg-glue/trust_bundle.go new file mode 100644 index 000000000..455d7dc9f --- /dev/null +++ b/agent/proxycfg-glue/trust_bundle.go @@ -0,0 +1,103 @@ +package proxycfgglue + +import ( + "context" + "errors" + + "github.com/hashicorp/go-memdb" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/consul/watch" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/proto/pbpeering" +) + +// CacheTrustBundle satisfies the proxycfg.TrustBundle interface by sourcing +// data from the agent cache. +func CacheTrustBundle(c *cache.Cache) proxycfg.TrustBundle { + return &cacheProxyDataSource[*cachetype.TrustBundleReadRequest]{c, cachetype.TrustBundleReadName} +} + +// ServerTrustBundle satisfies the proxycfg.TrustBundle interface by sourcing +// data from a blocking query against the server's state store. +func ServerTrustBundle(deps ServerDataSourceDeps) proxycfg.TrustBundle { + return &serverTrustBundle{deps} +} + +type serverTrustBundle struct { + deps ServerDataSourceDeps +} + +func (s *serverTrustBundle) Notify(ctx context.Context, req *cachetype.TrustBundleReadRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + // TODO(peering): ACL check. + return watch.ServerLocalNotify(ctx, correlationID, s.deps.GetStore, + func(ws memdb.WatchSet, store Store) (uint64, *pbpeering.TrustBundleReadResponse, error) { + index, bundle, err := store.PeeringTrustBundleRead(ws, state.Query{ + Value: req.Request.Name, + EnterpriseMeta: *structs.NodeEnterpriseMetaInPartition(req.Request.Partition), + }) + if err != nil { + return 0, nil, err + } + return index, &pbpeering.TrustBundleReadResponse{ + Index: index, + Bundle: bundle, + }, nil + }, + dispatchBlockingQueryUpdate[*pbpeering.TrustBundleReadResponse](ch), + ) +} + +// CacheTrustBundleList satisfies the proxycfg.TrustBundleList interface by sourcing +// data from the agent cache. +func CacheTrustBundleList(c *cache.Cache) proxycfg.TrustBundleList { + return &cacheProxyDataSource[*cachetype.TrustBundleListRequest]{c, cachetype.TrustBundleListName} +} + +// ServerTrustBundleList satisfies the proxycfg.TrustBundle interface by +// sourcing data from a blocking query against the server's state store. +func ServerTrustBundleList(deps ServerDataSourceDeps) proxycfg.TrustBundleList { + return &serverTrustBundleList{deps} +} + +type serverTrustBundleList struct { + deps ServerDataSourceDeps +} + +func (s *serverTrustBundleList) Notify(ctx context.Context, req *cachetype.TrustBundleListRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + entMeta := acl.NewEnterpriseMetaWithPartition(req.Request.Partition, req.Request.Namespace) + + // TODO(peering): ACL check. + return watch.ServerLocalNotify(ctx, correlationID, s.deps.GetStore, + func(ws memdb.WatchSet, store Store) (uint64, *pbpeering.TrustBundleListByServiceResponse, error) { + var ( + index uint64 + bundles []*pbpeering.PeeringTrustBundle + err error + ) + switch { + case req.Request.Kind == string(structs.ServiceKindMeshGateway): + index, bundles, err = store.PeeringTrustBundleList(ws, entMeta) + case req.Request.ServiceName != "": + index, bundles, err = store.TrustBundleListByService(ws, req.Request.ServiceName, s.deps.Datacenter, entMeta) + case req.Request.Kind != "": + err = errors.New("kind must be mesh-gateway if set") + default: + err = errors.New("one of service or kind is required") + } + if err != nil { + return 0, nil, err + } + + return index, &pbpeering.TrustBundleListByServiceResponse{ + Index: index, + Bundles: bundles, + }, nil + }, + dispatchBlockingQueryUpdate[*pbpeering.TrustBundleListByServiceResponse](ch), + ) +} diff --git a/agent/proxycfg-glue/trust_bundle_test.go b/agent/proxycfg-glue/trust_bundle_test.go new file mode 100644 index 000000000..910ffdcab --- /dev/null +++ b/agent/proxycfg-glue/trust_bundle_test.go @@ -0,0 +1,159 @@ +package proxycfgglue + +import ( + "context" + "testing" + + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/lib" + "github.com/hashicorp/consul/proto/pbpeering" + "github.com/hashicorp/consul/sdk/testutil" +) + +func TestServerTrustBundle(t *testing.T) { + const ( + index uint64 = 123 + peerName = "peer1" + ) + + store := state.NewStateStore(nil) + + require.NoError(t, store.PeeringTrustBundleWrite(index, &pbpeering.PeeringTrustBundle{ + PeerName: peerName, + TrustDomain: "before.com", + })) + + dataSource := ServerTrustBundle(ServerDataSourceDeps{ + GetStore: func() Store { return store }, + }) + + eventCh := make(chan proxycfg.UpdateEvent) + err := dataSource.Notify(context.Background(), &cachetype.TrustBundleReadRequest{ + Request: &pbpeering.TrustBundleReadRequest{ + Name: peerName, + }, + }, "", eventCh) + require.NoError(t, err) + + testutil.RunStep(t, "initial state", func(t *testing.T) { + result := getEventResult[*pbpeering.TrustBundleReadResponse](t, eventCh) + require.Equal(t, "before.com", result.Bundle.TrustDomain) + }) + + testutil.RunStep(t, "update trust bundle", func(t *testing.T) { + require.NoError(t, store.PeeringTrustBundleWrite(index+1, &pbpeering.PeeringTrustBundle{ + PeerName: peerName, + TrustDomain: "after.com", + })) + + result := getEventResult[*pbpeering.TrustBundleReadResponse](t, eventCh) + require.Equal(t, "after.com", result.Bundle.TrustDomain) + }) +} + +func TestServerTrustBundleList(t *testing.T) { + const index uint64 = 123 + + t.Run("list by service", func(t *testing.T) { + const ( + serviceName = "web" + us = "default" + them = "peer2" + ) + + store := state.NewStateStore(nil) + require.NoError(t, store.CASetConfig(index, &structs.CAConfiguration{ClusterID: "cluster-id"})) + + testutil.RunStep(t, "export service to peer", func(t *testing.T) { + require.NoError(t, store.PeeringWrite(index, &pbpeering.Peering{ + ID: testUUID(t), + Name: them, + State: pbpeering.PeeringState_ACTIVE, + })) + + require.NoError(t, store.PeeringTrustBundleWrite(index, &pbpeering.PeeringTrustBundle{ + PeerName: them, + })) + + require.NoError(t, store.EnsureConfigEntry(index, &structs.ExportedServicesConfigEntry{ + Name: us, + Services: []structs.ExportedService{ + { + Name: serviceName, + Consumers: []structs.ServiceConsumer{ + {PeerName: them}, + }, + }, + }, + })) + }) + + dataSource := ServerTrustBundleList(ServerDataSourceDeps{ + Datacenter: "dc1", + GetStore: func() Store { return store }, + }) + + eventCh := make(chan proxycfg.UpdateEvent) + err := dataSource.Notify(context.Background(), &cachetype.TrustBundleListRequest{ + Request: &pbpeering.TrustBundleListByServiceRequest{ + ServiceName: serviceName, + Partition: us, + }, + }, "", eventCh) + require.NoError(t, err) + + testutil.RunStep(t, "initial state", func(t *testing.T) { + result := getEventResult[*pbpeering.TrustBundleListByServiceResponse](t, eventCh) + require.Len(t, result.Bundles, 1) + }) + + testutil.RunStep(t, "unexport the service", func(t *testing.T) { + require.NoError(t, store.EnsureConfigEntry(index+1, &structs.ExportedServicesConfigEntry{ + Name: us, + Services: []structs.ExportedService{}, + })) + + result := getEventResult[*pbpeering.TrustBundleListByServiceResponse](t, eventCh) + require.Len(t, result.Bundles, 0) + }) + }) + + t.Run("list for mesh gateway", func(t *testing.T) { + store := state.NewStateStore(nil) + require.NoError(t, store.CASetConfig(index, &structs.CAConfiguration{ClusterID: "cluster-id"})) + + require.NoError(t, store.PeeringTrustBundleWrite(index, &pbpeering.PeeringTrustBundle{ + PeerName: "peer1", + })) + require.NoError(t, store.PeeringTrustBundleWrite(index, &pbpeering.PeeringTrustBundle{ + PeerName: "peer2", + })) + + dataSource := ServerTrustBundleList(ServerDataSourceDeps{ + GetStore: func() Store { return store }, + }) + + eventCh := make(chan proxycfg.UpdateEvent) + err := dataSource.Notify(context.Background(), &cachetype.TrustBundleListRequest{ + Request: &pbpeering.TrustBundleListByServiceRequest{ + Kind: string(structs.ServiceKindMeshGateway), + Partition: "default", + }, + }, "", eventCh) + require.NoError(t, err) + + result := getEventResult[*pbpeering.TrustBundleListByServiceResponse](t, eventCh) + require.Len(t, result.Bundles, 2) + }) +} + +func testUUID(t *testing.T) string { + v, err := lib.GenerateUUID(nil) + require.NoError(t, err) + return v +} diff --git a/agent/proxycfg-sources/catalog/config_source_test.go b/agent/proxycfg-sources/catalog/config_source_test.go index dffb0c2e5..4df59a7d3 100644 --- a/agent/proxycfg-sources/catalog/config_source_test.go +++ b/agent/proxycfg-sources/catalog/config_source_test.go @@ -116,7 +116,7 @@ func TestConfigSource_LocallyManagedService(t *testing.T) { token := "token" localState := testLocalState(t) - localState.AddService(&structs.NodeService{ID: serviceID.ID}, "") + localState.AddServiceWithChecks(&structs.NodeService{ID: serviceID.ID}, nil, "") localWatcher := NewMockWatcher(t) localWatcher.On("Watch", serviceID, nodeName, token). diff --git a/agent/proxycfg-sources/local/sync_test.go b/agent/proxycfg-sources/local/sync_test.go index b73c0e3b3..62b4e8db7 100644 --- a/agent/proxycfg-sources/local/sync_test.go +++ b/agent/proxycfg-sources/local/sync_test.go @@ -29,10 +29,10 @@ func TestSync(t *testing.T) { state := local.NewState(local.Config{}, hclog.NewNullLogger(), tokens) state.TriggerSyncChanges = func() {} - state.AddService(&structs.NodeService{ + state.AddServiceWithChecks(&structs.NodeService{ ID: serviceID, Kind: structs.ServiceKindConnectProxy, - }, serviceToken) + }, nil, serviceToken) cfgMgr := NewMockConfigManager(t) @@ -96,10 +96,10 @@ func TestSync(t *testing.T) { Return([]proxycfg.ProxyID{}). Maybe() - state.AddService(&structs.NodeService{ + state.AddServiceWithChecks(&structs.NodeService{ ID: serviceID, Kind: structs.ServiceKindConnectProxy, - }, "") + }, nil, "") select { case reg := <-registerCh: diff --git a/agent/proxycfg/connect_proxy.go b/agent/proxycfg/connect_proxy.go index 3221150db..15e3498f2 100644 --- a/agent/proxycfg/connect_proxy.go +++ b/agent/proxycfg/connect_proxy.go @@ -28,10 +28,12 @@ func (s *handlerConnectProxy) initialize(ctx context.Context) (ConfigSnapshot, e snap.ConnectProxy.WatchedGatewayEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes) snap.ConnectProxy.WatchedServiceChecks = make(map[structs.ServiceID][]structs.CheckType) snap.ConnectProxy.PreparedQueryEndpoints = make(map[UpstreamID]structs.CheckServiceNodes) + snap.ConnectProxy.DestinationsUpstream = watch.NewMap[UpstreamID, *structs.ServiceConfigEntry]() snap.ConnectProxy.UpstreamConfig = make(map[UpstreamID]*structs.Upstream) snap.ConnectProxy.PassthroughUpstreams = make(map[UpstreamID]map[string]map[string]struct{}) snap.ConnectProxy.PassthroughIndices = make(map[string]indexedTarget) snap.ConnectProxy.PeerUpstreamEndpoints = watch.NewMap[UpstreamID, structs.CheckServiceNodes]() + snap.ConnectProxy.DestinationGateways = watch.NewMap[UpstreamID, structs.CheckServiceNodes]() snap.ConnectProxy.PeerUpstreamEndpointsUseHostnames = make(map[UpstreamID]struct{}) // Watch for root changes @@ -44,11 +46,13 @@ func (s *handlerConnectProxy) initialize(ctx context.Context) (ConfigSnapshot, e return snap, err } - err = s.dataSources.TrustBundleList.Notify(ctx, &pbpeering.TrustBundleListByServiceRequest{ - // TODO(peering): Pass ACL token - ServiceName: s.proxyCfg.DestinationServiceName, - Namespace: s.proxyID.NamespaceOrDefault(), - Partition: s.proxyID.PartitionOrDefault(), + err = s.dataSources.TrustBundleList.Notify(ctx, &cachetype.TrustBundleListRequest{ + Request: &pbpeering.TrustBundleListByServiceRequest{ + ServiceName: s.proxyCfg.DestinationServiceName, + Namespace: s.proxyID.NamespaceOrDefault(), + Partition: s.proxyID.PartitionOrDefault(), + }, + QueryOptions: structs.QueryOptions{Token: s.token}, }, peeringTrustBundlesWatchID, s.ch) if err != nil { return snap, err @@ -116,6 +120,16 @@ func (s *handlerConnectProxy) initialize(ctx context.Context) (ConfigSnapshot, e if err != nil { return snap, err } + // We also infer upstreams from destinations (egress points) + err = s.dataSources.IntentionUpstreamsDestination.Notify(ctx, &structs.ServiceSpecificRequest{ + Datacenter: s.source.Datacenter, + QueryOptions: structs.QueryOptions{Token: s.token}, + ServiceName: s.proxyCfg.DestinationServiceName, + EnterpriseMeta: s.proxyID.EnterpriseMeta, + }, intentionUpstreamsDestinationID, s.ch) + if err != nil { + return snap, err + } } // Watch for updates to service endpoints for all upstreams @@ -212,11 +226,14 @@ func (s *handlerConnectProxy) initialize(ctx context.Context) (ConfigSnapshot, e } // Check whether a watch for this peer exists to avoid duplicates. - if _, ok := snap.ConnectProxy.UpstreamPeerTrustBundles.Get(uid.Peer); !ok { + if ok := snap.ConnectProxy.UpstreamPeerTrustBundles.IsWatched(uid.Peer); !ok { peerCtx, cancel := context.WithCancel(ctx) - if err := s.dataSources.TrustBundle.Notify(peerCtx, &pbpeering.TrustBundleReadRequest{ - Name: uid.Peer, - Partition: uid.PartitionOrDefault(), + if err := s.dataSources.TrustBundle.Notify(peerCtx, &cachetype.TrustBundleReadRequest{ + Request: &pbpeering.TrustBundleReadRequest{ + Name: uid.Peer, + Partition: uid.PartitionOrDefault(), + }, + QueryOptions: structs.QueryOptions{Token: s.token}, }, peerTrustBundleIDPrefix+uid.Peer, s.ch); err != nil { cancel() return snap, fmt.Errorf("error while watching trust bundle for peer %q: %w", uid.Peer, err) @@ -330,11 +347,14 @@ func (s *handlerConnectProxy) handleUpdate(ctx context.Context, u UpdateEvent, s snap.ConnectProxy.PeerUpstreamEndpoints.InitWatch(uid, hcancel) // Check whether a watch for this peer exists to avoid duplicates. - if _, ok := snap.ConnectProxy.UpstreamPeerTrustBundles.Get(uid.Peer); !ok { + if ok := snap.ConnectProxy.UpstreamPeerTrustBundles.IsWatched(uid.Peer); !ok { peerCtx, cancel := context.WithCancel(ctx) - if err := s.dataSources.TrustBundle.Notify(peerCtx, &pbpeering.TrustBundleReadRequest{ - Name: uid.Peer, - Partition: uid.PartitionOrDefault(), + if err := s.dataSources.TrustBundle.Notify(peerCtx, &cachetype.TrustBundleReadRequest{ + Request: &pbpeering.TrustBundleReadRequest{ + Name: uid.Peer, + Partition: uid.PartitionOrDefault(), + }, + QueryOptions: structs.QueryOptions{Token: s.token}, }, peerTrustBundleIDPrefix+uid.Peer, s.ch); err != nil { cancel() return fmt.Errorf("error while watching trust bundle for peer %q: %w", uid.Peer, err) @@ -508,7 +528,83 @@ func (s *handlerConnectProxy) handleUpdate(ctx context.Context, u UpdateEvent, s delete(snap.ConnectProxy.DiscoveryChain, uid) } } + case u.CorrelationID == intentionUpstreamsDestinationID: + resp, ok := u.Result.(*structs.IndexedServiceList) + if !ok { + return fmt.Errorf("invalid type for response %T", u.Result) + } + seenUpstreams := make(map[UpstreamID]struct{}) + for _, svc := range resp.Services { + uid := NewUpstreamIDFromServiceName(svc) + seenUpstreams[uid] = struct{}{} + { + childCtx, cancel := context.WithCancel(ctx) + err := s.dataSources.ConfigEntry.Notify(childCtx, &structs.ConfigEntryQuery{ + Kind: structs.ServiceDefaults, + Name: svc.Name, + Datacenter: s.source.Datacenter, + QueryOptions: structs.QueryOptions{Token: s.token}, + EnterpriseMeta: svc.EnterpriseMeta, + }, DestinationConfigEntryID+svc.String(), s.ch) + if err != nil { + cancel() + return err + } + snap.ConnectProxy.DestinationsUpstream.InitWatch(uid, cancel) + } + { + childCtx, cancel := context.WithCancel(ctx) + err := s.dataSources.ServiceGateways.Notify(childCtx, &structs.ServiceSpecificRequest{ + ServiceName: svc.Name, + Datacenter: s.source.Datacenter, + QueryOptions: structs.QueryOptions{Token: s.token}, + EnterpriseMeta: svc.EnterpriseMeta, + ServiceKind: structs.ServiceKindTerminatingGateway, + }, DestinationGatewayID+svc.String(), s.ch) + if err != nil { + cancel() + return err + } + snap.ConnectProxy.DestinationGateways.InitWatch(uid, cancel) + } + } + snap.ConnectProxy.DestinationsUpstream.ForEachKey(func(uid UpstreamID) bool { + if _, ok := seenUpstreams[uid]; !ok { + snap.ConnectProxy.DestinationsUpstream.CancelWatch(uid) + } + return true + }) + + snap.ConnectProxy.DestinationGateways.ForEachKey(func(uid UpstreamID) bool { + if _, ok := seenUpstreams[uid]; !ok { + snap.ConnectProxy.DestinationGateways.CancelWatch(uid) + } + return true + }) + case strings.HasPrefix(u.CorrelationID, DestinationConfigEntryID): + resp, ok := u.Result.(*structs.ConfigEntryResponse) + if !ok { + return fmt.Errorf("invalid type for response: %T", u.Result) + } + + pq := strings.TrimPrefix(u.CorrelationID, DestinationConfigEntryID) + uid := UpstreamIDFromString(pq) + serviceConf, ok := resp.Entry.(*structs.ServiceConfigEntry) + if !ok { + return fmt.Errorf("invalid type for service default: %T", resp.Entry.GetName()) + } + + snap.ConnectProxy.DestinationsUpstream.Set(uid, serviceConf) + case strings.HasPrefix(u.CorrelationID, DestinationGatewayID): + resp, ok := u.Result.(*structs.IndexedCheckServiceNodes) + if !ok { + return fmt.Errorf("invalid type for response: %T", u.Result) + } + + pq := strings.TrimPrefix(u.CorrelationID, DestinationGatewayID) + uid := UpstreamIDFromString(pq) + snap.ConnectProxy.DestinationGateways.Set(uid, resp.Nodes) case strings.HasPrefix(u.CorrelationID, "upstream:"+preparedQueryIDPrefix): resp, ok := u.Result.(*structs.PreparedQueryExecuteResponse) if !ok { diff --git a/agent/proxycfg/data_sources.go b/agent/proxycfg/data_sources.go index 310a4340e..bda0226ff 100644 --- a/agent/proxycfg/data_sources.go +++ b/agent/proxycfg/data_sources.go @@ -5,7 +5,6 @@ import ( cachetype "github.com/hashicorp/consul/agent/cache-types" "github.com/hashicorp/consul/agent/structs" - "github.com/hashicorp/consul/proto/pbpeering" ) // UpdateEvent contains new data for a resource we are subscribed to (e.g. an @@ -47,6 +46,10 @@ type DataSources struct { // notification channel. GatewayServices GatewayServices + // ServiceGateways provides updates about a gateway's upstream services on a + // notification channel. + ServiceGateways ServiceGateways + // Health provides service health updates on a notification channel. Health Health @@ -61,6 +64,10 @@ type DataSources struct { // notification channel. IntentionUpstreams IntentionUpstreams + // IntentionUpstreamsDestination provides intention-inferred upstream updates on a + // notification channel. + IntentionUpstreamsDestination IntentionUpstreamsDestination + // InternalServiceDump provides updates about a (gateway) service on a // notification channel. InternalServiceDump InternalServiceDump @@ -115,7 +122,7 @@ type ConfigEntry interface { Notify(ctx context.Context, req *structs.ConfigEntryQuery, correlationID string, ch chan<- UpdateEvent) error } -// ConfigEntry is the interface used to consume updates about a list of config +// ConfigEntryList is the interface used to consume updates about a list of config // entries. type ConfigEntryList interface { Notify(ctx context.Context, req *structs.ConfigEntryQuery, correlationID string, ch chan<- UpdateEvent) error @@ -139,6 +146,11 @@ type GatewayServices interface { Notify(ctx context.Context, req *structs.ServiceSpecificRequest, correlationID string, ch chan<- UpdateEvent) error } +// ServiceGateways is the interface used to consume updates about a service terminating gateways +type ServiceGateways interface { + Notify(ctx context.Context, req *structs.ServiceSpecificRequest, correlationID string, ch chan<- UpdateEvent) error +} + // Health is the interface used to consume service health updates. type Health interface { Notify(ctx context.Context, req *structs.ServiceSpecificRequest, correlationID string, ch chan<- UpdateEvent) error @@ -162,6 +174,12 @@ type IntentionUpstreams interface { Notify(ctx context.Context, req *structs.ServiceSpecificRequest, correlationID string, ch chan<- UpdateEvent) error } +// IntentionUpstreamsDestination is the interface used to consume updates about upstreams destination +// inferred from service intentions. +type IntentionUpstreamsDestination interface { + Notify(ctx context.Context, req *structs.ServiceSpecificRequest, correlationID string, ch chan<- UpdateEvent) error +} + // InternalServiceDump is the interface used to consume updates about a (gateway) // service via the internal ServiceDump RPC. type InternalServiceDump interface { @@ -201,13 +219,13 @@ type ServiceList interface { // TrustBundle is the interface used to consume updates about a single // peer's trust bundle. type TrustBundle interface { - Notify(ctx context.Context, req *pbpeering.TrustBundleReadRequest, correlationID string, ch chan<- UpdateEvent) error + Notify(ctx context.Context, req *cachetype.TrustBundleReadRequest, correlationID string, ch chan<- UpdateEvent) error } // TrustBundleList is the interface used to consume updates about trust bundles // for peered clusters that the given proxy is exported to. type TrustBundleList interface { - Notify(ctx context.Context, req *pbpeering.TrustBundleListByServiceRequest, correlationID string, ch chan<- UpdateEvent) error + Notify(ctx context.Context, req *cachetype.TrustBundleListRequest, correlationID string, ch chan<- UpdateEvent) error } // ExportedPeeredServices is the interface used to consume updates about the diff --git a/agent/proxycfg/ingress_gateway.go b/agent/proxycfg/ingress_gateway.go index 3fb67ddab..828229864 100644 --- a/agent/proxycfg/ingress_gateway.go +++ b/agent/proxycfg/ingress_gateway.go @@ -148,6 +148,12 @@ func (s *handlerIngressGateway) handleUpdate(ctx context.Context, u UpdateEvent, for uid, cancelFn := range snap.IngressGateway.WatchedDiscoveryChains { if _, ok := watchedSvcs[uid]; !ok { + for targetID, cancelUpstreamFn := range snap.IngressGateway.WatchedUpstreams[uid] { + delete(snap.IngressGateway.WatchedUpstreams[uid], targetID) + delete(snap.IngressGateway.WatchedUpstreamEndpoints[uid], targetID) + cancelUpstreamFn() + } + cancelFn() delete(snap.IngressGateway.WatchedDiscoveryChains, uid) } diff --git a/agent/proxycfg/internal/watch/watchmap.go b/agent/proxycfg/internal/watch/watchmap.go index bbf42dc9a..ec676bb8f 100644 --- a/agent/proxycfg/internal/watch/watchmap.go +++ b/agent/proxycfg/internal/watch/watchmap.go @@ -106,3 +106,18 @@ func (m Map[K, V]) ForEachKey(f func(K) bool) { } } } + +// ForEachKeyE iterates through the map, calling f +// for each iteration. It is up to the caller to +// Get the value and nil-check if required. +// If a non-nil error is returned by f, iterating +// stops and the error is returned. +// Order of iteration is non-deterministic. +func (m Map[K, V]) ForEachKeyE(f func(K) error) error { + for k := range m.M { + if err := f(k); err != nil { + return err + } + } + return nil +} diff --git a/agent/proxycfg/internal/watch/watchmap_test.go b/agent/proxycfg/internal/watch/watchmap_test.go index 590351853..deb7cea08 100644 --- a/agent/proxycfg/internal/watch/watchmap_test.go +++ b/agent/proxycfg/internal/watch/watchmap_test.go @@ -1,6 +1,7 @@ package watch import ( + "errors" "testing" "github.com/stretchr/testify/require" @@ -111,3 +112,43 @@ func TestMap_ForEach(t *testing.T) { require.Equal(t, 1, count) } } + +func TestMap_ForEachE(t *testing.T) { + type testType struct { + s string + } + + m := NewMap[string, any]() + inputs := map[string]any{ + "hello": 13, + "foo": struct{}{}, + "bar": &testType{s: "wow"}, + } + for k, v := range inputs { + m.InitWatch(k, nil) + m.Set(k, v) + } + require.Equal(t, 3, m.Len()) + + // returning nil error continues iteration + { + var count int + err := m.ForEachKeyE(func(k string) error { + count++ + return nil + }) + require.Equal(t, 3, count) + require.Nil(t, err) + } + + // returning an error should exit immediately + { + var count int + err := m.ForEachKeyE(func(k string) error { + count++ + return errors.New("boooo") + }) + require.Equal(t, 1, count) + require.Errorf(t, err, "boo") + } +} diff --git a/agent/proxycfg/manager_test.go b/agent/proxycfg/manager_test.go index 184b62148..2a3cdd15f 100644 --- a/agent/proxycfg/manager_test.go +++ b/agent/proxycfg/manager_test.go @@ -236,6 +236,8 @@ func TestManager_BasicLifecycle(t *testing.T) { PeerUpstreamEndpointsUseHostnames: map[UpstreamID]struct{}{}, }, PreparedQueryEndpoints: map[UpstreamID]structs.CheckServiceNodes{}, + DestinationsUpstream: watch.NewMap[UpstreamID, *structs.ServiceConfigEntry](), + DestinationGateways: watch.NewMap[UpstreamID, structs.CheckServiceNodes](), WatchedServiceChecks: map[structs.ServiceID][]structs.CheckType{}, Intentions: TestIntentions(), IntentionsSet: true, @@ -297,6 +299,8 @@ func TestManager_BasicLifecycle(t *testing.T) { PeerUpstreamEndpointsUseHostnames: map[UpstreamID]struct{}{}, }, PreparedQueryEndpoints: map[UpstreamID]structs.CheckServiceNodes{}, + DestinationsUpstream: watch.NewMap[UpstreamID, *structs.ServiceConfigEntry](), + DestinationGateways: watch.NewMap[UpstreamID, structs.CheckServiceNodes](), WatchedServiceChecks: map[structs.ServiceID][]structs.CheckType{}, Intentions: TestIntentions(), IntentionsSet: true, diff --git a/agent/proxycfg/mesh_gateway.go b/agent/proxycfg/mesh_gateway.go index b5c3f6755..f80ee537f 100644 --- a/agent/proxycfg/mesh_gateway.go +++ b/agent/proxycfg/mesh_gateway.go @@ -32,11 +32,14 @@ func (s *handlerMeshGateway) initialize(ctx context.Context) (ConfigSnapshot, er } // Watch for all peer trust bundles we may need. - err = s.dataSources.TrustBundleList.Notify(ctx, &pbpeering.TrustBundleListByServiceRequest{ - // TODO(peering): Pass ACL token - Kind: string(structs.ServiceKindMeshGateway), - Namespace: s.proxyID.NamespaceOrDefault(), - Partition: s.proxyID.PartitionOrDefault(), + err = s.dataSources.TrustBundleList.Notify(ctx, &cachetype.TrustBundleListRequest{ + Request: &pbpeering.TrustBundleListByServiceRequest{ + Kind: string(structs.ServiceKindMeshGateway), + ServiceName: s.service, + Namespace: s.proxyID.NamespaceOrDefault(), + Partition: s.proxyID.PartitionOrDefault(), + }, + QueryOptions: structs.QueryOptions{Token: s.token}, }, peeringTrustBundlesWatchID, s.ch) if err != nil { return snap, err diff --git a/agent/proxycfg/snapshot.go b/agent/proxycfg/snapshot.go index 6a02aad1e..8d2d81bed 100644 --- a/agent/proxycfg/snapshot.go +++ b/agent/proxycfg/snapshot.go @@ -142,6 +142,9 @@ type configSnapshotConnectProxy struct { // intentions. Intentions structs.Intentions IntentionsSet bool + + DestinationsUpstream watch.Map[UpstreamID, *structs.ServiceConfigEntry] + DestinationGateways watch.Map[UpstreamID, structs.CheckServiceNodes] } // isEmpty is a test helper @@ -163,6 +166,8 @@ func (c *configSnapshotConnectProxy) isEmpty() bool { len(c.UpstreamConfig) == 0 && len(c.PassthroughUpstreams) == 0 && len(c.IntentionUpstreams) == 0 && + c.DestinationGateways.Len() == 0 && + c.DestinationsUpstream.Len() == 0 && len(c.PeeredUpstreams) == 0 && !c.InboundPeerTrustBundlesSet && !c.MeshConfigSet && @@ -302,7 +307,7 @@ func (c *configSnapshotTerminatingGateway) ValidDestinations() []structs.Service // Skip the service if we haven't gotten our service config yet to know // the protocol. - if _, ok := c.ServiceConfigs[svc]; !ok || c.ServiceConfigs[svc].Destination.Address == "" { + if conf, ok := c.ServiceConfigs[svc]; !ok || len(conf.Destination.Addresses) == 0 { continue } @@ -833,19 +838,23 @@ func (u *ConfigSnapshotUpstreams) UpstreamPeerMeta(uid UpstreamID) structs.Peeri return *csn.Service.Connect.PeerMeta } +// PeeredUpstreamIDs returns a slice of peered UpstreamIDs from explicit config entries +// and implicit imported services. +// Upstreams whose trust bundles have not been stored in the snapshot are ignored. func (u *ConfigSnapshotUpstreams) PeeredUpstreamIDs() []UpstreamID { - out := make([]UpstreamID, 0, len(u.UpstreamConfig)) - for uid := range u.UpstreamConfig { - if uid.Peer == "" { - continue + out := make([]UpstreamID, 0, u.PeerUpstreamEndpoints.Len()) + u.PeerUpstreamEndpoints.ForEachKey(func(uid UpstreamID) bool { + if _, ok := u.PeerUpstreamEndpoints.Get(uid); !ok { + // uid might exist in the map but if Set hasn't been called, skip for now. + return true } if _, ok := u.UpstreamPeerTrustBundles.Get(uid.Peer); !ok { // The trust bundle for this upstream is not available yet, skip for now. - continue + return true } - out = append(out, uid) - } + return true + }) return out } diff --git a/agent/proxycfg/state.go b/agent/proxycfg/state.go index f9388cf48..13b22c4fd 100644 --- a/agent/proxycfg/state.go +++ b/agent/proxycfg/state.go @@ -37,9 +37,12 @@ const ( serviceIntentionsIDPrefix = "service-intentions:" intentionUpstreamsID = "intention-upstreams" peeredUpstreamsID = "peered-upstreams" + intentionUpstreamsDestinationID = "intention-upstreams-destination" upstreamPeerWatchIDPrefix = "upstream-peer:" exportedServiceListWatchID = "exported-service-list" meshConfigEntryID = "mesh" + DestinationConfigEntryID = "destination:" + DestinationGatewayID = "dest-gateway:" svcChecksWatchIDPrefix = cachetype.ServiceHTTPChecksName + ":" preparedQueryIDPrefix = string(structs.UpstreamDestTypePreparedQuery) + ":" defaultPreparedQueryPollInterval = 30 * time.Second diff --git a/agent/proxycfg/state_test.go b/agent/proxycfg/state_test.go index 36b641a69..855ded03d 100644 --- a/agent/proxycfg/state_test.go +++ b/agent/proxycfg/state_test.go @@ -125,18 +125,20 @@ func recordWatches(sc *stateConfig) *watchRecorder { Datacenters: typedWatchRecorder[*structs.DatacentersRequest]{wr}, FederationStateListMeshGateways: typedWatchRecorder[*structs.DCSpecificRequest]{wr}, GatewayServices: typedWatchRecorder[*structs.ServiceSpecificRequest]{wr}, + ServiceGateways: typedWatchRecorder[*structs.ServiceSpecificRequest]{wr}, Health: typedWatchRecorder[*structs.ServiceSpecificRequest]{wr}, HTTPChecks: typedWatchRecorder[*cachetype.ServiceHTTPChecksRequest]{wr}, Intentions: typedWatchRecorder[*structs.ServiceSpecificRequest]{wr}, IntentionUpstreams: typedWatchRecorder[*structs.ServiceSpecificRequest]{wr}, + IntentionUpstreamsDestination: typedWatchRecorder[*structs.ServiceSpecificRequest]{wr}, InternalServiceDump: typedWatchRecorder[*structs.ServiceDumpRequest]{wr}, LeafCertificate: typedWatchRecorder[*cachetype.ConnectCALeafRequest]{wr}, PeeredUpstreams: typedWatchRecorder[*structs.PartitionSpecificRequest]{wr}, PreparedQuery: typedWatchRecorder[*structs.PreparedQueryExecuteRequest]{wr}, ResolvedServiceConfig: typedWatchRecorder[*structs.ServiceConfigRequest]{wr}, ServiceList: typedWatchRecorder[*structs.DCSpecificRequest]{wr}, - TrustBundle: typedWatchRecorder[*pbpeering.TrustBundleReadRequest]{wr}, - TrustBundleList: typedWatchRecorder[*pbpeering.TrustBundleListByServiceRequest]{wr}, + TrustBundle: typedWatchRecorder[*cachetype.TrustBundleReadRequest]{wr}, + TrustBundleList: typedWatchRecorder[*cachetype.TrustBundleListRequest]{wr}, ExportedPeeredServices: typedWatchRecorder[*structs.DCSpecificRequest]{wr}, } recordWatchesEnterprise(sc, wr) @@ -201,9 +203,9 @@ func verifyDatacentersWatch(t testing.TB, request any) { func genVerifyTrustBundleReadWatch(peer string) verifyWatchRequest { return func(t testing.TB, request any) { - reqReal, ok := request.(*pbpeering.TrustBundleReadRequest) + reqReal, ok := request.(*cachetype.TrustBundleReadRequest) require.True(t, ok) - require.Equal(t, peer, reqReal.Name) + require.Equal(t, peer, reqReal.Request.Name) } } @@ -223,19 +225,19 @@ func genVerifyLeafWatch(expectedService string, expectedDatacenter string) verif func genVerifyTrustBundleListWatch(service string) verifyWatchRequest { return func(t testing.TB, request any) { - reqReal, ok := request.(*pbpeering.TrustBundleListByServiceRequest) + reqReal, ok := request.(*cachetype.TrustBundleListRequest) require.True(t, ok) - require.Equal(t, service, reqReal.ServiceName) + require.Equal(t, service, reqReal.Request.ServiceName) } } func genVerifyTrustBundleListWatchForMeshGateway(partition string) verifyWatchRequest { return func(t testing.TB, request any) { - reqReal, ok := request.(*pbpeering.TrustBundleListByServiceRequest) + reqReal, ok := request.(*cachetype.TrustBundleListRequest) require.True(t, ok) - require.Equal(t, string(structs.ServiceKindMeshGateway), reqReal.Kind) - require.True(t, acl.EqualPartitions(partition, reqReal.Partition), "%q != %q", partition, reqReal.Partition) - require.Empty(t, reqReal.ServiceName) + require.Equal(t, string(structs.ServiceKindMeshGateway), reqReal.Request.Kind) + require.True(t, acl.EqualPartitions(partition, reqReal.Request.Partition), "%q != %q", partition, reqReal.Request.Partition) + require.NotEmpty(t, reqReal.Request.ServiceName) } } @@ -1738,11 +1740,12 @@ func TestState_WatchesAndUpdates(t *testing.T) { stages: []verificationStage{ { requiredWatches: map[string]verifyWatchRequest{ - intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), - intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), - meshConfigEntryID: genVerifyMeshConfigWatch("dc1"), - rootsWatchID: genVerifyDCSpecificWatch("dc1"), - leafWatchID: genVerifyLeafWatch("api", "dc1"), + intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), + intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + intentionUpstreamsDestinationID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + meshConfigEntryID: genVerifyMeshConfigWatch("dc1"), + rootsWatchID: genVerifyDCSpecificWatch("dc1"), + leafWatchID: genVerifyLeafWatch("api", "dc1"), }, verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { require.False(t, snap.Valid(), "proxy without roots/leaf/intentions is not valid") @@ -1823,11 +1826,12 @@ func TestState_WatchesAndUpdates(t *testing.T) { // Empty on initialization { requiredWatches: map[string]verifyWatchRequest{ - intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), - intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), - meshConfigEntryID: genVerifyMeshConfigWatch("dc1"), - rootsWatchID: genVerifyDCSpecificWatch("dc1"), - leafWatchID: genVerifyLeafWatch("api", "dc1"), + intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), + intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + intentionUpstreamsDestinationID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + meshConfigEntryID: genVerifyMeshConfigWatch("dc1"), + rootsWatchID: genVerifyDCSpecificWatch("dc1"), + leafWatchID: genVerifyLeafWatch("api", "dc1"), }, verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { require.False(t, snap.Valid(), "proxy without roots/leaf/intentions is not valid") @@ -1882,10 +1886,11 @@ func TestState_WatchesAndUpdates(t *testing.T) { // Receiving an intention should lead to spinning up a discovery chain watch { requiredWatches: map[string]verifyWatchRequest{ - intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), - intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), - rootsWatchID: genVerifyDCSpecificWatch("dc1"), - leafWatchID: genVerifyLeafWatch("api", "dc1"), + intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), + intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + intentionUpstreamsDestinationID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + rootsWatchID: genVerifyDCSpecificWatch("dc1"), + leafWatchID: genVerifyLeafWatch("api", "dc1"), }, events: []UpdateEvent{ { @@ -2313,10 +2318,11 @@ func TestState_WatchesAndUpdates(t *testing.T) { { // Empty list of upstreams should clean up map keys requiredWatches: map[string]verifyWatchRequest{ - intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), - intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), - rootsWatchID: genVerifyDCSpecificWatch("dc1"), - leafWatchID: genVerifyLeafWatch("api", "dc1"), + intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), + intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + intentionUpstreamsDestinationID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + rootsWatchID: genVerifyDCSpecificWatch("dc1"), + leafWatchID: genVerifyLeafWatch("api", "dc1"), }, events: []UpdateEvent{ { @@ -2344,6 +2350,169 @@ func TestState_WatchesAndUpdates(t *testing.T) { }, }, }, + "transparent-proxy-handle-update-destination": { + ns: structs.NodeService{ + Kind: structs.ServiceKindConnectProxy, + ID: "api-proxy", + Service: "api-proxy", + Address: "10.0.1.1", + Proxy: structs.ConnectProxyConfig{ + DestinationServiceName: "api", + Mode: structs.ProxyModeTransparent, + Upstreams: structs.Upstreams{ + { + CentrallyConfigured: true, + DestinationName: structs.WildcardSpecifier, + DestinationNamespace: structs.WildcardSpecifier, + Config: map[string]interface{}{ + "connect_timeout_ms": 6000, + }, + MeshGateway: structs.MeshGatewayConfig{Mode: structs.MeshGatewayModeRemote}, + }, + }, + }, + }, + sourceDC: "dc1", + stages: []verificationStage{ + // Empty on initialization + { + requiredWatches: map[string]verifyWatchRequest{ + intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), + intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + intentionUpstreamsDestinationID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + meshConfigEntryID: genVerifyMeshConfigWatch("dc1"), + rootsWatchID: genVerifyDCSpecificWatch("dc1"), + leafWatchID: genVerifyLeafWatch("api", "dc1"), + }, + verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { + require.False(t, snap.Valid(), "proxy without roots/leaf/intentions is not valid") + require.True(t, snap.MeshGateway.isEmpty()) + require.True(t, snap.IngressGateway.isEmpty()) + require.True(t, snap.TerminatingGateway.isEmpty()) + + // Centrally configured upstream defaults should be stored so that upstreams from intentions can inherit them + require.Len(t, snap.ConnectProxy.UpstreamConfig, 1) + + wc := structs.NewServiceName(structs.WildcardSpecifier, structs.WildcardEnterpriseMetaInDefaultPartition()) + wcUID := NewUpstreamIDFromServiceName(wc) + require.Contains(t, snap.ConnectProxy.UpstreamConfig, wcUID) + }, + }, + // Valid snapshot after roots, leaf, and intentions + { + events: []UpdateEvent{ + rootWatchEvent(), + { + CorrelationID: leafWatchID, + Result: issuedCert, + Err: nil, + }, + { + CorrelationID: intentionsWatchID, + Result: TestIntentions(), + Err: nil, + }, + { + CorrelationID: meshConfigEntryID, + Result: &structs.ConfigEntryResponse{ + Entry: &structs.MeshConfigEntry{ + TransparentProxy: structs.TransparentProxyMeshConfig{}, + }, + }, + Err: nil, + }, + }, + verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { + require.True(t, snap.Valid(), "proxy with roots/leaf/intentions is valid") + require.Equal(t, indexedRoots, snap.Roots) + require.Equal(t, issuedCert, snap.Leaf()) + require.Equal(t, TestIntentions(), snap.ConnectProxy.Intentions) + require.True(t, snap.MeshGateway.isEmpty()) + require.True(t, snap.IngressGateway.isEmpty()) + require.True(t, snap.TerminatingGateway.isEmpty()) + require.True(t, snap.ConnectProxy.MeshConfigSet) + require.NotNil(t, snap.ConnectProxy.MeshConfig) + }, + }, + // Receiving an intention should lead to spinning up a DestinationConfigEntryID + { + requiredWatches: map[string]verifyWatchRequest{ + intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), + intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + intentionUpstreamsDestinationID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + rootsWatchID: genVerifyDCSpecificWatch("dc1"), + leafWatchID: genVerifyLeafWatch("api", "dc1"), + }, + events: []UpdateEvent{ + { + CorrelationID: intentionUpstreamsDestinationID, + Result: &structs.IndexedServiceList{ + Services: structs.ServiceList{ + db, + }, + }, + Err: nil, + }, + }, + verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { + require.True(t, snap.Valid(), "should still be valid") + + // Watches have a key allocated even if the value is not set + require.Equal(t, 1, snap.ConnectProxy.DestinationsUpstream.Len()) + }, + }, + // DestinationConfigEntryID updates should be stored + { + requiredWatches: map[string]verifyWatchRequest{ + DestinationConfigEntryID + dbUID.String(): genVerifyConfigEntryWatch(structs.ServiceDefaults, db.Name, "dc1"), + }, + events: []UpdateEvent{ + { + CorrelationID: DestinationConfigEntryID + dbUID.String(), + Result: &structs.ConfigEntryResponse{ + Entry: &structs.ServiceConfigEntry{Name: "db", Destination: &structs.DestinationConfig{}}, + }, + Err: nil, + }, + { + CorrelationID: DestinationGatewayID + dbUID.String(), + Result: &structs.IndexedCheckServiceNodes{ + Nodes: structs.CheckServiceNodes{ + { + Node: &structs.Node{ + Node: "foo", + Partition: api.PartitionOrDefault(), + Datacenter: "dc1", + }, + Service: &structs.NodeService{ + Service: "gtwy1", + TaggedAddresses: map[string]structs.ServiceAddress{ + structs.ServiceGatewayVirtualIPTag(structs.ServiceName{Name: "db", EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition()}): {Address: "172.0.0.1", Port: 443}, + }, + }, + Checks: structs.HealthChecks{}, + }, + }, + }, + Err: nil, + }, + }, + verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { + require.True(t, snap.Valid(), "should still be valid") + require.Equal(t, 1, snap.ConnectProxy.DestinationsUpstream.Len()) + require.Equal(t, 1, snap.ConnectProxy.DestinationGateways.Len()) + snap.ConnectProxy.DestinationsUpstream.ForEachKey(func(uid UpstreamID) bool { + _, ok := snap.ConnectProxy.DestinationsUpstream.Get(uid) + require.True(t, ok) + return true + }) + dbDest, ok := snap.ConnectProxy.DestinationsUpstream.Get(dbUID) + require.True(t, ok) + require.Equal(t, structs.ServiceConfigEntry{Name: "db", Destination: &structs.DestinationConfig{}}, *dbDest) + }, + }, + }, + }, // Receiving an empty upstreams from Intentions list shouldn't delete explicit upstream watches "transparent-proxy-handle-update-explicit-cross-dc": { ns: structs.NodeService{ @@ -2379,9 +2548,10 @@ func TestState_WatchesAndUpdates(t *testing.T) { // Empty on initialization { requiredWatches: map[string]verifyWatchRequest{ - intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), - intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), - meshConfigEntryID: genVerifyMeshConfigWatch("dc1"), + intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), + intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + intentionUpstreamsDestinationID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + meshConfigEntryID: genVerifyMeshConfigWatch("dc1"), "discovery-chain:" + upstreamIDForDC2(dbUID).String(): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{ Name: "db", EvaluateInDatacenter: "dc2", @@ -2479,8 +2649,9 @@ func TestState_WatchesAndUpdates(t *testing.T) { // be deleted from the snapshot. { requiredWatches: map[string]verifyWatchRequest{ - intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), - intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + intentionsWatchID: genVerifyIntentionWatch("api", "dc1"), + intentionUpstreamsID: genVerifyServiceSpecificRequest("api", "", "dc1", false), + intentionUpstreamsDestinationID: genVerifyServiceSpecificRequest("api", "", "dc1", false), "discovery-chain:" + upstreamIDForDC2(dbUID).String(): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{ Name: "db", EvaluateInDatacenter: "dc2", diff --git a/agent/proxycfg/testing.go b/agent/proxycfg/testing.go index 744c17e18..0493e30da 100644 --- a/agent/proxycfg/testing.go +++ b/agent/proxycfg/testing.go @@ -739,18 +739,20 @@ func testConfigSnapshotFixture( Datacenters: &noopDataSource[*structs.DatacentersRequest]{}, FederationStateListMeshGateways: &noopDataSource[*structs.DCSpecificRequest]{}, GatewayServices: &noopDataSource[*structs.ServiceSpecificRequest]{}, + ServiceGateways: &noopDataSource[*structs.ServiceSpecificRequest]{}, Health: &noopDataSource[*structs.ServiceSpecificRequest]{}, HTTPChecks: &noopDataSource[*cachetype.ServiceHTTPChecksRequest]{}, Intentions: &noopDataSource[*structs.ServiceSpecificRequest]{}, IntentionUpstreams: &noopDataSource[*structs.ServiceSpecificRequest]{}, + IntentionUpstreamsDestination: &noopDataSource[*structs.ServiceSpecificRequest]{}, InternalServiceDump: &noopDataSource[*structs.ServiceDumpRequest]{}, LeafCertificate: &noopDataSource[*cachetype.ConnectCALeafRequest]{}, PeeredUpstreams: &noopDataSource[*structs.PartitionSpecificRequest]{}, PreparedQuery: &noopDataSource[*structs.PreparedQueryExecuteRequest]{}, ResolvedServiceConfig: &noopDataSource[*structs.ServiceConfigRequest]{}, ServiceList: &noopDataSource[*structs.DCSpecificRequest]{}, - TrustBundle: &noopDataSource[*pbpeering.TrustBundleReadRequest]{}, - TrustBundleList: &noopDataSource[*pbpeering.TrustBundleListByServiceRequest]{}, + TrustBundle: &noopDataSource[*cachetype.TrustBundleReadRequest]{}, + TrustBundleList: &noopDataSource[*cachetype.TrustBundleListRequest]{}, ExportedPeeredServices: &noopDataSource[*structs.DCSpecificRequest]{}, }, dnsConfig: DNSConfig{ // TODO: make configurable @@ -946,13 +948,14 @@ func NewTestDataSources() *TestDataSources { HTTPChecks: NewTestDataSource[*cachetype.ServiceHTTPChecksRequest, []structs.CheckType](), Intentions: NewTestDataSource[*structs.ServiceSpecificRequest, structs.Intentions](), IntentionUpstreams: NewTestDataSource[*structs.ServiceSpecificRequest, *structs.IndexedServiceList](), + IntentionUpstreamsDestination: NewTestDataSource[*structs.ServiceSpecificRequest, *structs.IndexedServiceList](), InternalServiceDump: NewTestDataSource[*structs.ServiceDumpRequest, *structs.IndexedNodesWithGateways](), LeafCertificate: NewTestDataSource[*cachetype.ConnectCALeafRequest, *structs.IssuedCert](), PreparedQuery: NewTestDataSource[*structs.PreparedQueryExecuteRequest, *structs.PreparedQueryExecuteResponse](), ResolvedServiceConfig: NewTestDataSource[*structs.ServiceConfigRequest, *structs.ServiceConfigResponse](), ServiceList: NewTestDataSource[*structs.DCSpecificRequest, *structs.IndexedServiceList](), - TrustBundle: NewTestDataSource[*pbpeering.TrustBundleReadRequest, *pbpeering.TrustBundleReadResponse](), - TrustBundleList: NewTestDataSource[*pbpeering.TrustBundleListByServiceRequest, *pbpeering.TrustBundleListByServiceResponse](), + TrustBundle: NewTestDataSource[*cachetype.TrustBundleReadRequest, *pbpeering.TrustBundleReadResponse](), + TrustBundleList: NewTestDataSource[*cachetype.TrustBundleListRequest, *pbpeering.TrustBundleListByServiceResponse](), } srcs.buildEnterpriseSources() return srcs @@ -966,42 +969,46 @@ type TestDataSources struct { FederationStateListMeshGateways *TestDataSource[*structs.DCSpecificRequest, *structs.DatacenterIndexedCheckServiceNodes] Datacenters *TestDataSource[*structs.DatacentersRequest, *[]string] GatewayServices *TestDataSource[*structs.ServiceSpecificRequest, *structs.IndexedGatewayServices] + ServiceGateways *TestDataSource[*structs.ServiceSpecificRequest, *structs.IndexedServiceNodes] Health *TestDataSource[*structs.ServiceSpecificRequest, *structs.IndexedCheckServiceNodes] HTTPChecks *TestDataSource[*cachetype.ServiceHTTPChecksRequest, []structs.CheckType] Intentions *TestDataSource[*structs.ServiceSpecificRequest, structs.Intentions] IntentionUpstreams *TestDataSource[*structs.ServiceSpecificRequest, *structs.IndexedServiceList] + IntentionUpstreamsDestination *TestDataSource[*structs.ServiceSpecificRequest, *structs.IndexedServiceList] InternalServiceDump *TestDataSource[*structs.ServiceDumpRequest, *structs.IndexedNodesWithGateways] LeafCertificate *TestDataSource[*cachetype.ConnectCALeafRequest, *structs.IssuedCert] PeeredUpstreams *TestDataSource[*structs.PartitionSpecificRequest, *structs.IndexedPeeredServiceList] PreparedQuery *TestDataSource[*structs.PreparedQueryExecuteRequest, *structs.PreparedQueryExecuteResponse] ResolvedServiceConfig *TestDataSource[*structs.ServiceConfigRequest, *structs.ServiceConfigResponse] ServiceList *TestDataSource[*structs.DCSpecificRequest, *structs.IndexedServiceList] - TrustBundle *TestDataSource[*pbpeering.TrustBundleReadRequest, *pbpeering.TrustBundleReadResponse] - TrustBundleList *TestDataSource[*pbpeering.TrustBundleListByServiceRequest, *pbpeering.TrustBundleListByServiceResponse] + TrustBundle *TestDataSource[*cachetype.TrustBundleReadRequest, *pbpeering.TrustBundleReadResponse] + TrustBundleList *TestDataSource[*cachetype.TrustBundleListRequest, *pbpeering.TrustBundleListByServiceResponse] TestDataSourcesEnterprise } func (t *TestDataSources) ToDataSources() DataSources { ds := DataSources{ - CARoots: t.CARoots, - CompiledDiscoveryChain: t.CompiledDiscoveryChain, - ConfigEntry: t.ConfigEntry, - ConfigEntryList: t.ConfigEntryList, - Datacenters: t.Datacenters, - GatewayServices: t.GatewayServices, - Health: t.Health, - HTTPChecks: t.HTTPChecks, - Intentions: t.Intentions, - IntentionUpstreams: t.IntentionUpstreams, - InternalServiceDump: t.InternalServiceDump, - LeafCertificate: t.LeafCertificate, - PeeredUpstreams: t.PeeredUpstreams, - PreparedQuery: t.PreparedQuery, - ResolvedServiceConfig: t.ResolvedServiceConfig, - ServiceList: t.ServiceList, - TrustBundle: t.TrustBundle, - TrustBundleList: t.TrustBundleList, + CARoots: t.CARoots, + CompiledDiscoveryChain: t.CompiledDiscoveryChain, + ConfigEntry: t.ConfigEntry, + ConfigEntryList: t.ConfigEntryList, + Datacenters: t.Datacenters, + GatewayServices: t.GatewayServices, + ServiceGateways: t.ServiceGateways, + Health: t.Health, + HTTPChecks: t.HTTPChecks, + Intentions: t.Intentions, + IntentionUpstreams: t.IntentionUpstreams, + IntentionUpstreamsDestination: t.IntentionUpstreamsDestination, + InternalServiceDump: t.InternalServiceDump, + LeafCertificate: t.LeafCertificate, + PeeredUpstreams: t.PeeredUpstreams, + PreparedQuery: t.PreparedQuery, + ResolvedServiceConfig: t.ResolvedServiceConfig, + ServiceList: t.ServiceList, + TrustBundle: t.TrustBundle, + TrustBundleList: t.TrustBundleList, } t.fillEnterpriseDataSources(&ds) return ds diff --git a/agent/proxycfg/testing_peering.go b/agent/proxycfg/testing_peering.go index 9b1973c9a..0f20ad6ca 100644 --- a/agent/proxycfg/testing_peering.go +++ b/agent/proxycfg/testing_peering.go @@ -108,3 +108,144 @@ func TestConfigSnapshotPeering(t testing.T) *ConfigSnapshot { }, }) } + +func TestConfigSnapshotPeeringTProxy(t testing.T) *ConfigSnapshot { + // Test two explicitly defined upstreams api-a and noEndpoints + // as well as one implicitly inferred upstream db. + + var ( + noEndpointsUpstream = structs.Upstream{ + DestinationName: "no-endpoints", + DestinationPeer: "peer-a", + LocalBindPort: 1234, + } + noEndpoints = structs.PeeredServiceName{ + ServiceName: structs.NewServiceName("no-endpoints", nil), + Peer: "peer-a", + } + + apiAUpstream = structs.Upstream{ + DestinationName: "api-a", + DestinationPeer: "peer-a", + LocalBindPort: 9090, + } + apiA = structs.PeeredServiceName{ + ServiceName: structs.NewServiceName("api-a", nil), + Peer: "peer-a", + } + + db = structs.PeeredServiceName{ + ServiceName: structs.NewServiceName("db", nil), + Peer: "peer-a", + } + ) + + const peerTrustDomain = "1c053652-8512-4373-90cf-5a7f6263a994.consul" + + return TestConfigSnapshot(t, func(ns *structs.NodeService) { + ns.Proxy.Mode = structs.ProxyModeTransparent + ns.Proxy.Upstreams = []structs.Upstream{ + noEndpointsUpstream, + apiAUpstream, + } + }, []UpdateEvent{ + { + CorrelationID: meshConfigEntryID, + Result: &structs.ConfigEntryResponse{ + Entry: nil, + }, + }, + { + CorrelationID: peeredUpstreamsID, + Result: &structs.IndexedPeeredServiceList{ + Services: []structs.PeeredServiceName{ + apiA, + noEndpoints, + db, // implicitly added here + }, + }, + }, + { + CorrelationID: peerTrustBundleIDPrefix + "peer-a", + Result: &pbpeering.TrustBundleReadResponse{ + Bundle: TestPeerTrustBundles(t).Bundles[0], + }, + }, + { + CorrelationID: upstreamPeerWatchIDPrefix + NewUpstreamID(&noEndpointsUpstream).String(), + Result: &structs.IndexedCheckServiceNodes{ + Nodes: []structs.CheckServiceNode{}, + }, + }, + { + CorrelationID: upstreamPeerWatchIDPrefix + NewUpstreamID(&apiAUpstream).String(), + Result: &structs.IndexedCheckServiceNodes{ + Nodes: structs.CheckServiceNodes{ + { + Node: &structs.Node{ + Node: "node1", + Address: "127.0.0.1", + PeerName: "peer-a", + }, + Service: &structs.NodeService{ + ID: "api-a-1", + Service: "api-a", + PeerName: "peer-a", + Address: "1.2.3.4", + TaggedAddresses: map[string]structs.ServiceAddress{ + "virtual": {Address: "10.0.0.1"}, + structs.TaggedAddressVirtualIP: {Address: "240.0.0.1"}, + }, + Connect: structs.ServiceConnect{ + PeerMeta: &structs.PeeringServiceMeta{ + SNI: []string{ + "api-a.default.default.cloud.external." + peerTrustDomain, + }, + SpiffeID: []string{ + "spiffe://" + peerTrustDomain + "/ns/default/dc/cloud-dc/svc/api-a", + }, + Protocol: "tcp", + }, + }, + }, + }, + }, + }, + }, + { + CorrelationID: upstreamPeerWatchIDPrefix + NewUpstreamIDFromPeeredServiceName(db).String(), + Result: &structs.IndexedCheckServiceNodes{ + Nodes: structs.CheckServiceNodes{ + { + Node: &structs.Node{ + Node: "node1", + Address: "127.0.0.1", + PeerName: "peer-a", + }, + Service: &structs.NodeService{ + ID: "db-1", + Service: "db", + PeerName: "peer-a", + Address: "2.3.4.5", // Expect no endpoint or listener for this address + TaggedAddresses: map[string]structs.ServiceAddress{ + "virtual": {Address: "10.0.0.2"}, + structs.TaggedAddressVirtualIP: {Address: "240.0.0.2"}, + }, + Connect: structs.ServiceConnect{ + PeerMeta: &structs.PeeringServiceMeta{ + SNI: []string{ + "db.default.default.cloud.external." + peerTrustDomain, + }, + SpiffeID: []string{ + "spiffe://" + peerTrustDomain + "/ns/default/dc/cloud-dc/svc/db", + }, + Protocol: "tcp", + }, + }, + }, + }, + }, + }, + }, + }) +} diff --git a/agent/proxycfg/testing_terminating_gateway.go b/agent/proxycfg/testing_terminating_gateway.go index 00771433b..fda659173 100644 --- a/agent/proxycfg/testing_terminating_gateway.go +++ b/agent/proxycfg/testing_terminating_gateway.go @@ -328,8 +328,10 @@ func TestConfigSnapshotTerminatingGatewayDestinations(t testing.T, populateDesti roots, _ := TestCerts(t) var ( - externalIPTCP = structs.NewServiceName("external-IP-TCP", nil) - externalHostnameTCP = structs.NewServiceName("external-hostname-TCP", nil) + externalIPTCP = structs.NewServiceName("external-IP-TCP", nil) + externalHostnameTCP = structs.NewServiceName("external-hostname-TCP", nil) + externalIPHTTP = structs.NewServiceName("external-IP-HTTP", nil) + externalHostnameHTTP = structs.NewServiceName("external-hostname-HTTP", nil) ) baseEvents := []UpdateEvent{ @@ -357,6 +359,14 @@ func TestConfigSnapshotTerminatingGatewayDestinations(t testing.T, populateDesti Service: externalHostnameTCP, ServiceKind: structs.GatewayServiceKindDestination, }, + &structs.GatewayService{ + Service: externalIPHTTP, + ServiceKind: structs.GatewayServiceKindDestination, + }, + &structs.GatewayService{ + Service: externalHostnameHTTP, + ServiceKind: structs.GatewayServiceKindDestination, + }, ) baseEvents = testSpliceEvents(baseEvents, []UpdateEvent{ @@ -375,6 +385,14 @@ func TestConfigSnapshotTerminatingGatewayDestinations(t testing.T, populateDesti CorrelationID: serviceIntentionsIDPrefix + externalHostnameTCP.String(), Result: structs.Intentions{}, }, + { + CorrelationID: serviceIntentionsIDPrefix + externalIPHTTP.String(), + Result: structs.Intentions{}, + }, + { + CorrelationID: serviceIntentionsIDPrefix + externalHostnameHTTP.String(), + Result: structs.Intentions{}, + }, // ======== { CorrelationID: serviceLeafIDPrefix + externalIPTCP.String(), @@ -390,6 +408,20 @@ func TestConfigSnapshotTerminatingGatewayDestinations(t testing.T, populateDesti PrivateKeyPEM: "placeholder.key", }, }, + { + CorrelationID: serviceLeafIDPrefix + externalIPHTTP.String(), + Result: &structs.IssuedCert{ + CertPEM: "placeholder.crt", + PrivateKeyPEM: "placeholder.key", + }, + }, + { + CorrelationID: serviceLeafIDPrefix + externalHostnameHTTP.String(), + Result: &structs.IssuedCert{ + CertPEM: "placeholder.crt", + PrivateKeyPEM: "placeholder.key", + }, + }, // ======== { CorrelationID: serviceConfigIDPrefix + externalIPTCP.String(), @@ -397,8 +429,12 @@ func TestConfigSnapshotTerminatingGatewayDestinations(t testing.T, populateDesti Mode: structs.ProxyModeTransparent, ProxyConfig: map[string]interface{}{"protocol": "tcp"}, Destination: structs.DestinationConfig{ - Address: "192.168.0.1", - Port: 80, + Addresses: []string{ + "192.168.0.1", + "192.168.0.2", + "192.168.0.3", + }, + Port: 80, }, }, }, @@ -408,8 +444,33 @@ func TestConfigSnapshotTerminatingGatewayDestinations(t testing.T, populateDesti Mode: structs.ProxyModeTransparent, ProxyConfig: map[string]interface{}{"protocol": "tcp"}, Destination: structs.DestinationConfig{ - Address: "*.hashicorp.com", - Port: 8089, + Addresses: []string{ + "api.hashicorp.com", + "web.hashicorp.com", + }, + Port: 8089, + }, + }, + }, + { + CorrelationID: serviceConfigIDPrefix + externalIPHTTP.String(), + Result: &structs.ServiceConfigResponse{ + Mode: structs.ProxyModeTransparent, + ProxyConfig: map[string]interface{}{"protocol": "http"}, + Destination: structs.DestinationConfig{ + Addresses: []string{"192.168.0.2"}, + Port: 80, + }, + }, + }, + { + CorrelationID: serviceConfigIDPrefix + externalHostnameHTTP.String(), + Result: &structs.ServiceConfigResponse{ + Mode: structs.ProxyModeTransparent, + ProxyConfig: map[string]interface{}{"protocol": "http"}, + Destination: structs.DestinationConfig{ + Addresses: []string{"httpbin.org"}, + Port: 80, }, }, }, diff --git a/agent/proxycfg/testing_tproxy.go b/agent/proxycfg/testing_tproxy.go index b93e6c970..45d0236a0 100644 --- a/agent/proxycfg/testing_tproxy.go +++ b/agent/proxycfg/testing_tproxy.go @@ -1,6 +1,7 @@ package proxycfg import ( + "github.com/hashicorp/consul/api" "time" "github.com/mitchellh/go-testing-interface" @@ -522,3 +523,135 @@ func TestConfigSnapshotTransparentProxyTerminatingGatewayCatalogDestinationsOnly }, }) } + +func TestConfigSnapshotTransparentProxyDestination(t testing.T) *ConfigSnapshot { + // DiscoveryChain without an UpstreamConfig should yield a + // filter chain when in transparent proxy mode + var ( + google = structs.NewServiceName("google", nil) + googleUID = NewUpstreamIDFromServiceName(google) + googleCE = structs.ServiceConfigEntry{ + Name: "google", + Destination: &structs.DestinationConfig{ + Addresses: []string{ + "www.google.com", + "api.google.com", + }, + Port: 443, + }, + } + + kafka = structs.NewServiceName("kafka", nil) + kafkaUID = NewUpstreamIDFromServiceName(kafka) + kafkaCE = structs.ServiceConfigEntry{ + Name: "kafka", + Destination: &structs.DestinationConfig{ + Addresses: []string{ + "192.168.2.1", + "192.168.2.2", + }, + Port: 9093, + }, + } + ) + + return TestConfigSnapshot(t, func(ns *structs.NodeService) { + ns.Proxy.Mode = structs.ProxyModeTransparent + }, []UpdateEvent{ + { + CorrelationID: meshConfigEntryID, + Result: &structs.ConfigEntryResponse{ + Entry: &structs.MeshConfigEntry{ + TransparentProxy: structs.TransparentProxyMeshConfig{ + MeshDestinationsOnly: true, + }, + }, + }, + }, + { + CorrelationID: intentionUpstreamsDestinationID, + Result: &structs.IndexedServiceList{ + Services: structs.ServiceList{ + google, + kafka, + }, + }, + }, + { + CorrelationID: DestinationConfigEntryID + googleUID.String(), + Result: &structs.ConfigEntryResponse{ + Entry: &googleCE, + }, + }, + { + CorrelationID: DestinationConfigEntryID + kafkaUID.String(), + Result: &structs.ConfigEntryResponse{ + Entry: &kafkaCE, + }, + }, + { + CorrelationID: DestinationGatewayID + googleUID.String(), + Result: &structs.IndexedCheckServiceNodes{ + Nodes: structs.CheckServiceNodes{ + { + Node: &structs.Node{ + Node: "node1", + Address: "172.168.0.1", + Datacenter: "dc1", + }, + Service: &structs.NodeService{ + ID: "tgtw1", + Address: "172.168.0.1", + Port: 8443, + Kind: structs.ServiceKindTerminatingGateway, + TaggedAddresses: map[string]structs.ServiceAddress{ + structs.TaggedAddressLANIPv4: {Address: "172.168.0.1", Port: 8443}, + structs.TaggedAddressVirtualIP: {Address: "240.0.0.1"}, + }, + }, + Checks: []*structs.HealthCheck{ + { + Node: "node1", + ServiceName: "tgtw", + Name: "force", + Status: api.HealthPassing, + }, + }, + }, + }, + }, + }, + { + CorrelationID: DestinationGatewayID + kafkaUID.String(), + Result: &structs.IndexedCheckServiceNodes{ + Nodes: structs.CheckServiceNodes{ + { + Node: &structs.Node{ + Node: "node1", + Address: "172.168.0.1", + Datacenter: "dc1", + }, + Service: &structs.NodeService{ + ID: "tgtw1", + Address: "172.168.0.1", + Port: 8443, + Kind: structs.ServiceKindTerminatingGateway, + TaggedAddresses: map[string]structs.ServiceAddress{ + structs.TaggedAddressLANIPv4: {Address: "172.168.0.1", Port: 8443}, + structs.TaggedAddressVirtualIP: {Address: "240.0.0.1"}, + }, + }, + Checks: []*structs.HealthCheck{ + { + Node: "node1", + ServiceName: "tgtw", + Name: "force", + Status: api.HealthPassing, + }, + }, + }, + }, + }, + }, + }) +} diff --git a/agent/proxycfg/upstreams.go b/agent/proxycfg/upstreams.go index a47510543..600a89e09 100644 --- a/agent/proxycfg/upstreams.go +++ b/agent/proxycfg/upstreams.go @@ -436,7 +436,17 @@ type discoveryChainWatchOpts struct { } func (s *handlerUpstreams) watchDiscoveryChain(ctx context.Context, snap *ConfigSnapshot, opts discoveryChainWatchOpts) error { - if _, ok := snap.ConnectProxy.WatchedDiscoveryChains[opts.id]; ok { + var watchedDiscoveryChains map[UpstreamID]context.CancelFunc + switch s.kind { + case structs.ServiceKindIngressGateway: + watchedDiscoveryChains = snap.IngressGateway.WatchedDiscoveryChains + case structs.ServiceKindConnectProxy: + watchedDiscoveryChains = snap.ConnectProxy.WatchedDiscoveryChains + default: + return fmt.Errorf("unsupported kind %s", s.kind) + } + + if _, ok := watchedDiscoveryChains[opts.id]; ok { return nil } @@ -457,16 +467,7 @@ func (s *handlerUpstreams) watchDiscoveryChain(ctx context.Context, snap *Config return err } - switch s.kind { - case structs.ServiceKindIngressGateway: - snap.IngressGateway.WatchedDiscoveryChains[opts.id] = cancel - case structs.ServiceKindConnectProxy: - snap.ConnectProxy.WatchedDiscoveryChains[opts.id] = cancel - default: - cancel() - return fmt.Errorf("unsupported kind %s", s.kind) - } - + watchedDiscoveryChains[opts.id] = cancel return nil } diff --git a/agent/rpc/peering/service.go b/agent/rpc/peering/service.go index 94b7d73a3..86a8b11dc 100644 --- a/agent/rpc/peering/service.go +++ b/agent/rpc/peering/service.go @@ -13,11 +13,14 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/codes" grpcstatus "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/acl/resolver" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/stream" "github.com/hashicorp/consul/agent/dns" + external "github.com/hashicorp/consul/agent/grpc-external" "github.com/hashicorp/consul/agent/grpc-external/services/peerstream" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/lib" @@ -42,6 +45,20 @@ func (e *errPeeringInvalidServerAddress) Error() string { return fmt.Sprintf("%s is not a valid peering server address", e.addr) } +// For private/internal gRPC handlers, protoc-gen-rpc-glue generates the +// requisite methods to satisfy the structs.RPCInfo interface using fields +// from the pbcommon package. This service is public, so we can't use those +// fields in our proto definition. Instead, we construct our RPCInfo manually. +var writeRequest struct { + structs.WriteRequest + structs.DCSpecificRequest +} + +var readRequest struct { + structs.QueryOptions + structs.DCSpecificRequest +} + // Server implements pbpeering.PeeringService to provide RPC operations for // managing peering relationships. type Server struct { @@ -55,6 +72,7 @@ type Config struct { ForwardRPC func(structs.RPCInfo, func(*grpc.ClientConn) error) (bool, error) Datacenter string ConnectEnabled bool + PeeringEnabled bool } func NewServer(cfg Config) *Server { @@ -88,6 +106,12 @@ func (s *Server) Register(grpcServer *grpc.Server) { // providing access to CA data and the RPC system for forwarding requests to // other servers. type Backend interface { + // ResolveTokenAndDefaultMeta returns an acl.Authorizer which authorizes + // actions based on the permissions granted to the token. + // If either entMeta or authzContext are non-nil they will be populated with the + // partition and namespace from the token. + ResolveTokenAndDefaultMeta(token string, entMeta *acl.EnterpriseMeta, authzCtx *acl.AuthorizerContext) (resolver.Result, error) + // GetAgentCACertificates returns the CA certificate to be returned in the peering token data GetAgentCACertificates() ([]string, error) @@ -138,6 +162,8 @@ type Store interface { TrustBundleListByService(ws memdb.WatchSet, service, dc string, entMeta acl.EnterpriseMeta) (uint64, []*pbpeering.PeeringTrustBundle, error) } +var peeringNotEnabledErr = grpcstatus.Error(codes.FailedPrecondition, "peering must be enabled to use this endpoint") + // GenerateToken implements the PeeringService RPC method to generate a // peering token which is the initial step in establishing a peering relationship // with other Consul clusters. @@ -145,6 +171,10 @@ func (s *Server) GenerateToken( ctx context.Context, req *pbpeering.GenerateTokenRequest, ) (*pbpeering.GenerateTokenResponse, error) { + if !s.Config.PeeringEnabled { + return nil, peeringNotEnabledErr + } + if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil { return nil, grpcstatus.Error(codes.InvalidArgument, err.Error()) } @@ -157,11 +187,11 @@ func (s *Server) GenerateToken( return nil, fmt.Errorf("meta tags failed validation: %w", err) } - // TODO(peering): add metrics - // TODO(peering): add tracing + defer metrics.MeasureSince([]string{"peering", "generate_token"}, time.Now()) resp := &pbpeering.GenerateTokenResponse{} - handled, err := s.ForwardRPC(req, func(conn *grpc.ClientConn) error { + handled, err := s.ForwardRPC(&writeRequest, func(conn *grpc.ClientConn) error { + ctx := external.ForwardMetadataContext(ctx) var err error resp, err = pbpeering.NewPeeringServiceClient(conn).GenerateToken(ctx, req) return err @@ -170,55 +200,78 @@ func (s *Server) GenerateToken( return resp, err } + var authzCtx acl.AuthorizerContext + entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition) + authz, err := s.Backend.ResolveTokenAndDefaultMeta(external.TokenFromContext(ctx), entMeta, &authzCtx) + if err != nil { + return nil, err + } + + if err := authz.ToAllowAuthorizer().PeeringWriteAllowed(&authzCtx); err != nil { + return nil, err + } + + var peering *pbpeering.Peering + + // This loop ensures at most one retry in the case of a race condition. + for canRetry := true; canRetry; canRetry = false { + peering, err = s.getExistingPeering(req.PeerName, entMeta.PartitionOrDefault()) + if err != nil { + return nil, err + } + + if peering == nil { + id, err := lib.GenerateUUID(s.Backend.CheckPeeringUUID) + if err != nil { + return resp, err + } + peering = &pbpeering.Peering{ + ID: id, + Name: req.PeerName, + Meta: req.Meta, + + // PartitionOrEmpty is used to avoid writing "default" in OSS. + Partition: entMeta.PartitionOrEmpty(), + } + } else { + // validate that this peer name is not being used as a dialer already + if err := validatePeer(peering, false); err != nil { + return nil, err + } + } + writeReq := pbpeering.PeeringWriteRequest{ + Peering: peering, + } + if err := s.Backend.PeeringWrite(&writeReq); err != nil { + // There's a possible race where two servers call Generate Token at the + // same time with the same peer name for the first time. They both + // generate an ID and try to insert and only one wins. This detects the + // collision and forces the loser to discard its generated ID and use + // the one from the other server. + if strings.Contains(err.Error(), "A peering already exists with the name") { + // retry to fetch existing peering + continue + } + return nil, fmt.Errorf("failed to write peering: %w", err) + } + // write succeeded, break loop early + break + } + ca, err := s.Backend.GetAgentCACertificates() if err != nil { return nil, err } - serverAddrs, err := s.Backend.GetServerAddresses() - if err != nil { - return nil, err - } - - canRetry := true -RETRY_ONCE: - id, err := s.getExistingOrCreateNewPeerID(req.PeerName, req.Partition) - if err != nil { - return nil, err - } - - writeReq := pbpeering.PeeringWriteRequest{ - Peering: &pbpeering.Peering{ - ID: id, - Name: req.PeerName, - // TODO(peering): Normalize from ACL token once this endpoint is guarded by ACLs. - Partition: req.PartitionOrDefault(), - Meta: req.Meta, - }, - } - if err := s.Backend.PeeringWrite(&writeReq); err != nil { - // There's a possible race where two servers call Generate Token at the - // same time with the same peer name for the first time. They both - // generate an ID and try to insert and only one wins. This detects the - // collision and forces the loser to discard its generated ID and use - // the one from the other server. - if canRetry && strings.Contains(err.Error(), "A peering already exists with the name") { - canRetry = false - goto RETRY_ONCE + // ServerExternalAddresses must be formatted as addr:port. + var serverAddrs []string + if len(req.ServerExternalAddresses) > 0 { + serverAddrs = req.ServerExternalAddresses + } else { + serverAddrs, err = s.Backend.GetServerAddresses() + if err != nil { + return nil, err } - return nil, fmt.Errorf("failed to write peering: %w", err) - } - - q := state.Query{ - Value: strings.ToLower(req.PeerName), - EnterpriseMeta: *structs.NodeEnterpriseMetaInPartition(req.Partition), - } - _, peering, err := s.Backend.Store().PeeringRead(nil, q) - if err != nil { - return nil, err - } - if peering == nil { - return nil, fmt.Errorf("peering was deleted while token generation request was in flight") } tok := structs.PeeringToken{ @@ -244,6 +297,10 @@ func (s *Server) Establish( ctx context.Context, req *pbpeering.EstablishRequest, ) (*pbpeering.EstablishResponse, error) { + if !s.Config.PeeringEnabled { + return nil, peeringNotEnabledErr + } + // validate prior to forwarding to the leader, this saves a network hop if err := dns.ValidateLabel(req.PeerName); err != nil { return nil, fmt.Errorf("%s is not a valid peer name: %w", req.PeerName, err) @@ -261,7 +318,8 @@ func (s *Server) Establish( } resp := &pbpeering.EstablishResponse{} - handled, err := s.ForwardRPC(req, func(conn *grpc.ClientConn) error { + handled, err := s.ForwardRPC(&writeRequest, func(conn *grpc.ClientConn) error { + ctx := external.ForwardMetadataContext(ctx) var err error resp, err = pbpeering.NewPeeringServiceClient(conn).Establish(ctx, req) return err @@ -272,17 +330,48 @@ func (s *Server) Establish( defer metrics.MeasureSince([]string{"peering", "establish"}, time.Now()) + var authzCtx acl.AuthorizerContext + entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition) + authz, err := s.Backend.ResolveTokenAndDefaultMeta(external.TokenFromContext(ctx), entMeta, &authzCtx) + if err != nil { + return nil, err + } + + if err := authz.ToAllowAuthorizer().PeeringWriteAllowed(&authzCtx); err != nil { + return nil, err + } + + peering, err := s.getExistingPeering(req.PeerName, entMeta.PartitionOrDefault()) + if err != nil { + return nil, err + } + + // we don't want to default req.Partition unlike because partitions are empty in OSS + if err := s.validatePeeringInPartition(tok.PeerID, req.Partition); err != nil { + return nil, err + } + + var id string + if peering == nil { + id, err = lib.GenerateUUID(s.Backend.CheckPeeringUUID) + if err != nil { + return nil, err + } + } else { + id = peering.ID + } + + // validate that this peer name is not being used as an acceptor already + if err := validatePeer(peering, true); err != nil { + return nil, err + } + // convert ServiceAddress values to strings serverAddrs := make([]string, len(tok.ServerAddresses)) for i, addr := range tok.ServerAddresses { serverAddrs[i] = addr } - id, err := s.getExistingOrCreateNewPeerID(req.PeerName, req.Partition) - if err != nil { - return nil, err - } - // as soon as a peering is written with a list of ServerAddresses that is // non-empty, the leader routine will see the peering and attempt to // establish a connection with the remote peer. @@ -299,22 +388,47 @@ func (s *Server) Establish( PeerID: tok.PeerID, Meta: req.Meta, State: pbpeering.PeeringState_ESTABLISHING, + + // PartitionOrEmpty is used to avoid writing "default" in OSS. + Partition: entMeta.PartitionOrEmpty(), }, } - if err = s.Backend.PeeringWrite(writeReq); err != nil { + if err := s.Backend.PeeringWrite(writeReq); err != nil { return nil, fmt.Errorf("failed to write peering: %w", err) } - // resp.Status == 0 + // TODO(peering): low prio: consider adding response details return resp, nil } +// validatePeeringInPartition makes sure that we don't create a peering in the same partition. We validate by looking at +// the remotePeerID from the PeeringToken and looking up for a peering in the partition. If there is one and the +// request partition is the same, then we are attempting to peer within the partition, which we shouldn't. +func (s *Server) validatePeeringInPartition(remotePeerID, partition string) error { + _, peering, err := s.Backend.Store().PeeringReadByID(nil, remotePeerID) + if err != nil { + return fmt.Errorf("cannot read peering by ID: %w", err) + } + + if peering != nil && peering.Partition == partition { + return fmt.Errorf("cannot create a peering within the same partition (ENT) or cluster (OSS)") + } + + return nil +} + +// OPTIMIZE: Handle blocking queries func (s *Server) PeeringRead(ctx context.Context, req *pbpeering.PeeringReadRequest) (*pbpeering.PeeringReadResponse, error) { + if !s.Config.PeeringEnabled { + return nil, peeringNotEnabledErr + } + if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil { return nil, grpcstatus.Error(codes.InvalidArgument, err.Error()) } var resp *pbpeering.PeeringReadResponse - handled, err := s.ForwardRPC(req, func(conn *grpc.ClientConn) error { + handled, err := s.ForwardRPC(&readRequest, func(conn *grpc.ClientConn) error { + ctx := external.ForwardMetadataContext(ctx) var err error resp, err = pbpeering.NewPeeringServiceClient(conn).PeeringRead(ctx, req) return err @@ -324,12 +438,22 @@ func (s *Server) PeeringRead(ctx context.Context, req *pbpeering.PeeringReadRequ } defer metrics.MeasureSince([]string{"peering", "read"}, time.Now()) - // TODO(peering): ACL check request token - // TODO(peering): handle blocking queries + var authzCtx acl.AuthorizerContext + entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition) + authz, err := s.Backend.ResolveTokenAndDefaultMeta(external.TokenFromContext(ctx), entMeta, &authzCtx) + if err != nil { + return nil, err + } + + if err := authz.ToAllowAuthorizer().PeeringReadAllowed(&authzCtx); err != nil { + return nil, err + } + q := state.Query{ Value: strings.ToLower(req.Name), - EnterpriseMeta: *structs.NodeEnterpriseMetaInPartition(req.Partition)} + EnterpriseMeta: *entMeta, + } _, peering, err := s.Backend.Store().PeeringRead(nil, q) if err != nil { return nil, err @@ -337,17 +461,24 @@ func (s *Server) PeeringRead(ctx context.Context, req *pbpeering.PeeringReadRequ if peering == nil { return &pbpeering.PeeringReadResponse{Peering: nil}, nil } - cp := copyPeeringWithNewState(peering, s.reconciledStreamStateHint(peering.ID, peering.State)) + + cp := s.reconcilePeering(peering) return &pbpeering.PeeringReadResponse{Peering: cp}, nil } +// OPTIMIZE: Handle blocking queries func (s *Server) PeeringList(ctx context.Context, req *pbpeering.PeeringListRequest) (*pbpeering.PeeringListResponse, error) { + if !s.Config.PeeringEnabled { + return nil, peeringNotEnabledErr + } + if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil { return nil, grpcstatus.Error(codes.InvalidArgument, err.Error()) } var resp *pbpeering.PeeringListResponse - handled, err := s.ForwardRPC(req, func(conn *grpc.ClientConn) error { + handled, err := s.ForwardRPC(&readRequest, func(conn *grpc.ClientConn) error { + ctx := external.ForwardMetadataContext(ctx) var err error resp, err = pbpeering.NewPeeringServiceClient(conn).PeeringList(ctx, req) return err @@ -356,11 +487,20 @@ func (s *Server) PeeringList(ctx context.Context, req *pbpeering.PeeringListRequ return resp, err } - defer metrics.MeasureSince([]string{"peering", "list"}, time.Now()) - // TODO(peering): ACL check request token + var authzCtx acl.AuthorizerContext + entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition) + authz, err := s.Backend.ResolveTokenAndDefaultMeta(external.TokenFromContext(ctx), entMeta, &authzCtx) + if err != nil { + return nil, err + } - // TODO(peering): handle blocking queries - _, peerings, err := s.Backend.Store().PeeringList(nil, *structs.NodeEnterpriseMetaInPartition(req.Partition)) + if err := authz.ToAllowAuthorizer().PeeringReadAllowed(&authzCtx); err != nil { + return nil, err + } + + defer metrics.MeasureSince([]string{"peering", "list"}, time.Now()) + + _, peerings, err := s.Backend.Store().PeeringList(nil, *entMeta) if err != nil { return nil, err } @@ -368,35 +508,56 @@ func (s *Server) PeeringList(ctx context.Context, req *pbpeering.PeeringListRequ // reconcile the actual peering state; need to copy over the ds for peering var cPeerings []*pbpeering.Peering for _, p := range peerings { - cp := copyPeeringWithNewState(p, s.reconciledStreamStateHint(p.ID, p.State)) + cp := s.reconcilePeering(p) cPeerings = append(cPeerings, cp) } + return &pbpeering.PeeringListResponse{Peerings: cPeerings}, nil } -// TODO(peering): Maybe get rid of this when actually monitoring the stream health -// reconciledStreamStateHint peaks into the streamTracker and determines whether a peering should be marked -// as PeeringState.Active or not -func (s *Server) reconciledStreamStateHint(pID string, pState pbpeering.PeeringState) pbpeering.PeeringState { - streamState, found := s.Tracker.StreamStatus(pID) +// TODO(peering): Get rid of this func when we stop using the stream tracker for imported/ exported services and the peering state +// reconcilePeering enriches the peering with the following information: +// -- PeeringState.Active if the peering is active +// -- ImportedServicesCount and ExportedServicesCount +// NOTE: we return a new peering with this additional data +func (s *Server) reconcilePeering(peering *pbpeering.Peering) *pbpeering.Peering { + streamState, found := s.Tracker.StreamStatus(peering.ID) + if !found { + s.Logger.Warn("did not find peer in stream tracker; cannot populate imported and"+ + " exported services count or reconcile peering state", "peerID", peering.ID) + return peering + } else { + cp := copyPeering(peering) - if found && streamState.Connected { - return pbpeering.PeeringState_ACTIVE + // reconcile pbpeering.PeeringState_Active + if streamState.Connected { + cp.State = pbpeering.PeeringState_ACTIVE + } else if streamState.DisconnectErrorMessage != "" { + cp.State = pbpeering.PeeringState_FAILING + } + + // add imported & exported services counts + cp.ImportedServiceCount = streamState.GetImportedServicesCount() + cp.ExportedServiceCount = streamState.GetExportedServicesCount() + + return cp } - - // default, no reconciliation - return pState } // TODO(peering): As of writing, this method is only used in tests to set up Peerings in the state store. // Consider removing if we can find another way to populate state store in peering_endpoint_test.go func (s *Server) PeeringWrite(ctx context.Context, req *pbpeering.PeeringWriteRequest) (*pbpeering.PeeringWriteResponse, error) { + if !s.Config.PeeringEnabled { + return nil, peeringNotEnabledErr + } + if err := s.Backend.EnterpriseCheckPartitions(req.Peering.Partition); err != nil { return nil, grpcstatus.Error(codes.InvalidArgument, err.Error()) } var resp *pbpeering.PeeringWriteResponse - handled, err := s.ForwardRPC(req, func(conn *grpc.ClientConn) error { + handled, err := s.ForwardRPC(&writeRequest, func(conn *grpc.ClientConn) error { + ctx := external.ForwardMetadataContext(ctx) var err error resp, err = pbpeering.NewPeeringServiceClient(conn).PeeringWrite(ctx, req) return err @@ -406,19 +567,37 @@ func (s *Server) PeeringWrite(ctx context.Context, req *pbpeering.PeeringWriteRe } defer metrics.MeasureSince([]string{"peering", "write"}, time.Now()) - // TODO(peering): ACL check request token + + var authzCtx acl.AuthorizerContext + entMeta := structs.DefaultEnterpriseMetaInPartition(req.Peering.Partition) + authz, err := s.Backend.ResolveTokenAndDefaultMeta(external.TokenFromContext(ctx), entMeta, &authzCtx) + if err != nil { + return nil, err + } + + if err := authz.ToAllowAuthorizer().PeeringWriteAllowed(&authzCtx); err != nil { + return nil, err + } if req.Peering == nil { return nil, fmt.Errorf("missing required peering body") } - id, err := s.getExistingOrCreateNewPeerID(req.Peering.Name, req.Peering.Partition) + var id string + peering, err := s.getExistingPeering(req.Peering.Name, entMeta.PartitionOrDefault()) if err != nil { return nil, err } + if peering == nil { + id, err = lib.GenerateUUID(s.Backend.CheckPeeringUUID) + if err != nil { + return nil, err + } + } else { + id = peering.ID + } req.Peering.ID = id - // TODO(peering): handle blocking queries err = s.Backend.PeeringWrite(req) if err != nil { return nil, err @@ -427,12 +606,17 @@ func (s *Server) PeeringWrite(ctx context.Context, req *pbpeering.PeeringWriteRe } func (s *Server) PeeringDelete(ctx context.Context, req *pbpeering.PeeringDeleteRequest) (*pbpeering.PeeringDeleteResponse, error) { + if !s.Config.PeeringEnabled { + return nil, peeringNotEnabledErr + } + if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil { return nil, grpcstatus.Error(codes.InvalidArgument, err.Error()) } var resp *pbpeering.PeeringDeleteResponse - handled, err := s.ForwardRPC(req, func(conn *grpc.ClientConn) error { + handled, err := s.ForwardRPC(&writeRequest, func(conn *grpc.ClientConn) error { + ctx := external.ForwardMetadataContext(ctx) var err error resp, err = pbpeering.NewPeeringServiceClient(conn).PeeringDelete(ctx, req) return err @@ -442,13 +626,21 @@ func (s *Server) PeeringDelete(ctx context.Context, req *pbpeering.PeeringDelete } defer metrics.MeasureSince([]string{"peering", "delete"}, time.Now()) - // TODO(peering): ACL check request token - // TODO(peering): handle blocking queries + var authzCtx acl.AuthorizerContext + entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition) + authz, err := s.Backend.ResolveTokenAndDefaultMeta(external.TokenFromContext(ctx), entMeta, &authzCtx) + if err != nil { + return nil, err + } + + if err := authz.ToAllowAuthorizer().PeeringWriteAllowed(&authzCtx); err != nil { + return nil, err + } q := state.Query{ Value: strings.ToLower(req.Name), - EnterpriseMeta: *structs.NodeEnterpriseMetaInPartition(req.Partition), + EnterpriseMeta: *entMeta, } _, existing, err := s.Backend.Store().PeeringRead(nil, q) if err != nil { @@ -470,9 +662,11 @@ func (s *Server) PeeringDelete(ctx context.Context, req *pbpeering.PeeringDelete // for deletion the peering is effectively gone. ID: existing.ID, Name: req.Name, - Partition: req.Partition, State: pbpeering.PeeringState_DELETING, DeletedAt: structs.TimeToProto(time.Now().UTC()), + + // PartitionOrEmpty is used to avoid writing "default" in OSS. + Partition: entMeta.PartitionOrEmpty(), }, } err = s.Backend.PeeringWrite(writeReq) @@ -482,13 +676,19 @@ func (s *Server) PeeringDelete(ctx context.Context, req *pbpeering.PeeringDelete return &pbpeering.PeeringDeleteResponse{}, nil } +// OPTIMIZE: Handle blocking queries func (s *Server) TrustBundleRead(ctx context.Context, req *pbpeering.TrustBundleReadRequest) (*pbpeering.TrustBundleReadResponse, error) { + if !s.Config.PeeringEnabled { + return nil, peeringNotEnabledErr + } + if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil { return nil, grpcstatus.Error(codes.InvalidArgument, err.Error()) } var resp *pbpeering.TrustBundleReadResponse - handled, err := s.ForwardRPC(req, func(conn *grpc.ClientConn) error { + handled, err := s.ForwardRPC(&readRequest, func(conn *grpc.ClientConn) error { + ctx := external.ForwardMetadataContext(ctx) var err error resp, err = pbpeering.NewPeeringServiceClient(conn).TrustBundleRead(ctx, req) return err @@ -498,13 +698,21 @@ func (s *Server) TrustBundleRead(ctx context.Context, req *pbpeering.TrustBundle } defer metrics.MeasureSince([]string{"peering", "trust_bundle_read"}, time.Now()) - // TODO(peering): ACL check request token - // TODO(peering): handle blocking queries + var authzCtx acl.AuthorizerContext + entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition) + authz, err := s.Backend.ResolveTokenAndDefaultMeta(external.TokenFromContext(ctx), entMeta, &authzCtx) + if err != nil { + return nil, err + } + + if err := authz.ToAllowAuthorizer().ServiceWriteAnyAllowed(&authzCtx); err != nil { + return nil, err + } idx, trustBundle, err := s.Backend.Store().PeeringTrustBundleRead(nil, state.Query{ Value: req.Name, - EnterpriseMeta: *structs.NodeEnterpriseMetaInPartition(req.Partition), + EnterpriseMeta: *entMeta, }) if err != nil { return nil, fmt.Errorf("failed to read trust bundle for peer %s: %w", req.Name, err) @@ -517,16 +725,25 @@ func (s *Server) TrustBundleRead(ctx context.Context, req *pbpeering.TrustBundle } // TODO(peering): rename rpc & request/response to drop the "service" part +// OPTIMIZE: Handle blocking queries func (s *Server) TrustBundleListByService(ctx context.Context, req *pbpeering.TrustBundleListByServiceRequest) (*pbpeering.TrustBundleListByServiceResponse, error) { + if !s.Config.PeeringEnabled { + return nil, peeringNotEnabledErr + } + if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil { return nil, grpcstatus.Error(codes.InvalidArgument, err.Error()) } if err := s.Backend.EnterpriseCheckNamespaces(req.Namespace); err != nil { return nil, grpcstatus.Error(codes.InvalidArgument, err.Error()) } + if req.ServiceName == "" { + return nil, errors.New("missing service name") + } var resp *pbpeering.TrustBundleListByServiceResponse - handled, err := s.ForwardRPC(req, func(conn *grpc.ClientConn) error { + handled, err := s.ForwardRPC(&readRequest, func(conn *grpc.ClientConn) error { + ctx := external.ForwardMetadataContext(ctx) var err error resp, err = pbpeering.NewPeeringServiceClient(conn).TrustBundleListByService(ctx, req) return err @@ -536,11 +753,17 @@ func (s *Server) TrustBundleListByService(ctx context.Context, req *pbpeering.Tr } defer metrics.MeasureSince([]string{"peering", "trust_bundle_list_by_service"}, time.Now()) - // TODO(peering): ACL check request token for service:write on the service name - - // TODO(peering): handle blocking queries + var authzCtx acl.AuthorizerContext entMeta := acl.NewEnterpriseMetaWithPartition(req.Partition, req.Namespace) + authz, err := s.Backend.ResolveTokenAndDefaultMeta(external.TokenFromContext(ctx), &entMeta, &authzCtx) + if err != nil { + return nil, err + } + + if err := authz.ToAllowAuthorizer().ServiceWriteAllowed(req.ServiceName, &authzCtx); err != nil { + return nil, err + } var ( idx uint64 @@ -548,10 +771,10 @@ func (s *Server) TrustBundleListByService(ctx context.Context, req *pbpeering.Tr ) switch { - case req.ServiceName != "": - idx, bundles, err = s.Backend.Store().TrustBundleListByService(nil, req.ServiceName, s.Datacenter, entMeta) case req.Kind == string(structs.ServiceKindMeshGateway): idx, bundles, err = s.Backend.Store().PeeringTrustBundleList(nil, entMeta) + case req.ServiceName != "": + idx, bundles, err = s.Backend.Store().TrustBundleListByService(nil, req.ServiceName, s.Datacenter, entMeta) case req.Kind != "": return nil, grpcstatus.Error(codes.InvalidArgument, "kind must be mesh-gateway if set") default: @@ -564,40 +787,39 @@ func (s *Server) TrustBundleListByService(ctx context.Context, req *pbpeering.Tr return &pbpeering.TrustBundleListByServiceResponse{Index: idx, Bundles: bundles}, nil } -func (s *Server) getExistingOrCreateNewPeerID(peerName, partition string) (string, error) { +func (s *Server) getExistingPeering(peerName, partition string) (*pbpeering.Peering, error) { q := state.Query{ Value: strings.ToLower(peerName), EnterpriseMeta: *structs.NodeEnterpriseMetaInPartition(partition), } _, peering, err := s.Backend.Store().PeeringRead(nil, q) if err != nil { - return "", err - } - if peering != nil { - return peering.ID, nil + return nil, err } - id, err := lib.GenerateUUID(s.Backend.CheckPeeringUUID) - if err != nil { - return "", err - } - return id, nil + return peering, nil } -func copyPeeringWithNewState(p *pbpeering.Peering, state pbpeering.PeeringState) *pbpeering.Peering { - return &pbpeering.Peering{ - ID: p.ID, - Name: p.Name, - Partition: p.Partition, - DeletedAt: p.DeletedAt, - Meta: p.Meta, - PeerID: p.PeerID, - PeerCAPems: p.PeerCAPems, - PeerServerAddresses: p.PeerServerAddresses, - PeerServerName: p.PeerServerName, - CreateIndex: p.CreateIndex, - ModifyIndex: p.ModifyIndex, - - State: state, +// validatePeer enforces the following rule for an existing peering: +// - if a peering already exists, it can only be used as an acceptor or dialer +// +// We define a DIALER as a peering that has server addresses (or a peering that is created via the Establish endpoint) +// Conversely, we define an ACCEPTOR as a peering that is created via the GenerateToken endpoint +func validatePeer(peering *pbpeering.Peering, shouldDial bool) error { + if peering != nil && peering.ShouldDial() != shouldDial { + if shouldDial { + return fmt.Errorf("cannot create peering with name: %q; there is an existing peering expecting to be dialed", peering.Name) + } else { + return fmt.Errorf("cannot create peering with name: %q; there is already an established peering", peering.Name) + } } + + return nil +} + +func copyPeering(p *pbpeering.Peering) *pbpeering.Peering { + var copyP pbpeering.Peering + proto.Merge(©P, p) + + return ©P } diff --git a/agent/rpc/peering/service_test.go b/agent/rpc/peering/service_test.go index 6a8f32915..883f4fcc0 100644 --- a/agent/rpc/peering/service_test.go +++ b/agent/rpc/peering/service_test.go @@ -15,11 +15,14 @@ import ( "github.com/hashicorp/go-uuid" "github.com/stretchr/testify/require" gogrpc "google.golang.org/grpc" + "google.golang.org/grpc/codes" + grpcstatus "google.golang.org/grpc/status" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/stream" + external "github.com/hashicorp/consul/agent/grpc-external" grpc "github.com/hashicorp/consul/agent/grpc-internal" "github.com/hashicorp/consul/agent/grpc-internal/resolver" "github.com/hashicorp/consul/agent/pool" @@ -39,6 +42,13 @@ import ( "github.com/hashicorp/consul/types" ) +const ( + testTokenPeeringReadSecret = "9a83c138-a0c7-40f1-89fa-6acf9acd78f5" + testTokenPeeringWriteSecret = "91f90a41-0840-4afe-b615-68745f9e16c1" + testTokenServiceReadSecret = "1ef8e3cf-6e95-49aa-9f73-a0d3ad1a77d4" + testTokenServiceWriteSecret = "4a3dc05d-d86c-4f20-be43-8f4f8f045fea" +) + func generateTooManyMetaKeys() map[string]string { // todo -- modularize in structs.go or testing.go tooMuchMeta := make(map[string]string) @@ -59,7 +69,7 @@ func TestPeeringService_GenerateToken(t *testing.T) { // TODO(peering): see note on newTestServer, refactor to not use this s := newTestServer(t, func(c *consul.Config) { c.SerfLANConfig.MemberlistConfig.AdvertiseAddr = "127.0.0.1" - c.TLSConfig.InternalRPC.CAFile = cafile + c.TLSConfig.GRPC.CAFile = cafile c.DataDir = dir }) client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) @@ -68,12 +78,12 @@ func TestPeeringService_GenerateToken(t *testing.T) { // TODO(peering): for more failure cases, consider using a table test // check meta tags - reqE := pbpeering.GenerateTokenRequest{PeerName: "peerB", Datacenter: "dc1", Meta: generateTooManyMetaKeys()} + reqE := pbpeering.GenerateTokenRequest{PeerName: "peerB", Meta: generateTooManyMetaKeys()} _, errE := client.GenerateToken(ctx, &reqE) require.EqualError(t, errE, "rpc error: code = Unknown desc = meta tags failed validation: Node metadata cannot contain more than 64 key/value pairs") // happy path - req := pbpeering.GenerateTokenRequest{PeerName: "peerB", Datacenter: "dc1", Meta: map[string]string{"foo": "bar"}} + req := pbpeering.GenerateTokenRequest{PeerName: "peerB", Meta: map[string]string{"foo": "bar"}} resp, err := client.GenerateToken(ctx, &req) require.NoError(t, err) @@ -108,6 +118,96 @@ func TestPeeringService_GenerateToken(t *testing.T) { require.Equal(t, expect, peers[0]) } +func TestPeeringService_GenerateTokenExternalAddress(t *testing.T) { + dir := testutil.TempDir(t, "consul") + signer, _, _ := tlsutil.GeneratePrivateKey() + ca, _, _ := tlsutil.GenerateCA(tlsutil.CAOpts{Signer: signer}) + cafile := path.Join(dir, "cacert.pem") + require.NoError(t, ioutil.WriteFile(cafile, []byte(ca), 0600)) + + // TODO(peering): see note on newTestServer, refactor to not use this + s := newTestServer(t, func(c *consul.Config) { + c.SerfLANConfig.MemberlistConfig.AdvertiseAddr = "127.0.0.1" + c.TLSConfig.GRPC.CAFile = cafile + c.DataDir = dir + }) + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + externalAddress := "32.1.2.3:8502" + // happy path + req := pbpeering.GenerateTokenRequest{PeerName: "peerB", Meta: map[string]string{"foo": "bar"}, ServerExternalAddresses: []string{externalAddress}} + resp, err := client.GenerateToken(ctx, &req) + require.NoError(t, err) + + tokenJSON, err := base64.StdEncoding.DecodeString(resp.PeeringToken) + require.NoError(t, err) + + token := &structs.PeeringToken{} + require.NoError(t, json.Unmarshal(tokenJSON, token)) + require.Equal(t, "server.dc1.consul", token.ServerName) + require.Len(t, token.ServerAddresses, 1) + require.Equal(t, externalAddress, token.ServerAddresses[0]) + require.Equal(t, []string{ca}, token.CA) +} + +func TestPeeringService_GenerateToken_ACLEnforcement(t *testing.T) { + // TODO(peering): see note on newTestServer, refactor to not use this + s := newTestServer(t, func(conf *consul.Config) { + conf.ACLsEnabled = true + conf.ACLResolverSettings.ACLDefaultPolicy = acl.PolicyDeny + }) + upsertTestACLs(t, s.Server.FSM().State()) + + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + + type testcase struct { + name string + req *pbpeering.GenerateTokenRequest + token string + expectErr string + } + run := func(t *testing.T, tc testcase) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + _, err := client.GenerateToken(external.ContextWithToken(ctx, tc.token), tc.req) + if tc.expectErr != "" { + require.Contains(t, err.Error(), tc.expectErr) + return + } + require.NoError(t, err) + } + tcs := []testcase{ + { + name: "anonymous token lacks permissions", + req: &pbpeering.GenerateTokenRequest{PeerName: "foo"}, + expectErr: "lacks permission 'peering:write'", + }, + { + name: "read token lacks permissions", + req: &pbpeering.GenerateTokenRequest{ + PeerName: "foo", + }, + token: testTokenPeeringReadSecret, + expectErr: "lacks permission 'peering:write'", + }, + { + name: "write token grants permission", + req: &pbpeering.GenerateTokenRequest{ + PeerName: "foo", + }, + token: testTokenPeeringWriteSecret, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + run(t, tc) + }) + } +} + func TestPeeringService_Establish(t *testing.T) { validToken := peering.TestPeeringToken("83474a06-cca4-4ff4-99a4-4152929c8160") validTokenJSON, _ := json.Marshal(&validToken) @@ -214,6 +314,95 @@ func TestPeeringService_Establish(t *testing.T) { } } +// We define a valid peering by a peering that does not occur over the same server addresses +func TestPeeringService_Establish_validPeeringInPartition(t *testing.T) { + // TODO(peering): see note on newTestServer, refactor to not use this + s := newTestServer(t, nil) + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + req := pbpeering.GenerateTokenRequest{PeerName: "peerOne"} + resp, err := client.GenerateToken(ctx, &req) + require.NoError(t, err) + require.NotEmpty(t, resp) + + establishReq := &pbpeering.EstablishRequest{ + PeerName: "peerTwo", + PeeringToken: resp.PeeringToken} + + respE, errE := client.Establish(ctx, establishReq) + require.Error(t, errE) + require.Contains(t, errE.Error(), "cannot create a peering within the same partition (ENT) or cluster (OSS)") + require.Nil(t, respE) +} + +func TestPeeringService_Establish_ACLEnforcement(t *testing.T) { + validToken := peering.TestPeeringToken("83474a06-cca4-4ff4-99a4-4152929c8160") + validTokenJSON, _ := json.Marshal(&validToken) + validTokenB64 := base64.StdEncoding.EncodeToString(validTokenJSON) + + // TODO(peering): see note on newTestServer, refactor to not use this + s := newTestServer(t, func(conf *consul.Config) { + conf.ACLsEnabled = true + conf.ACLResolverSettings.ACLDefaultPolicy = acl.PolicyDeny + }) + upsertTestACLs(t, s.Server.FSM().State()) + + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + + type testcase struct { + name string + req *pbpeering.EstablishRequest + token string + expectErr string + } + run := func(t *testing.T, tc testcase) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + _, err := client.Establish(external.ContextWithToken(ctx, tc.token), tc.req) + if tc.expectErr != "" { + require.Contains(t, err.Error(), tc.expectErr) + return + } + require.NoError(t, err) + } + tcs := []testcase{ + { + name: "anonymous token lacks permissions", + req: &pbpeering.EstablishRequest{ + PeerName: "foo", + PeeringToken: validTokenB64, + }, + expectErr: "lacks permission 'peering:write'", + }, + { + name: "read token lacks permissions", + req: &pbpeering.EstablishRequest{ + PeerName: "foo", + PeeringToken: validTokenB64, + }, + token: testTokenPeeringReadSecret, + expectErr: "lacks permission 'peering:write'", + }, + { + name: "write token grants permission", + req: &pbpeering.EstablishRequest{ + PeerName: "foo", + PeeringToken: validTokenB64, + }, + token: testTokenPeeringWriteSecret, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + run(t, tc) + }) + } +} + func TestPeeringService_Read(t *testing.T) { // TODO(peering): see note on newTestServer, refactor to not use this s := newTestServer(t, nil) @@ -273,6 +462,72 @@ func TestPeeringService_Read(t *testing.T) { } } +func TestPeeringService_Read_ACLEnforcement(t *testing.T) { + // TODO(peering): see note on newTestServer, refactor to not use this + s := newTestServer(t, func(conf *consul.Config) { + conf.ACLsEnabled = true + conf.ACLResolverSettings.ACLDefaultPolicy = acl.PolicyDeny + }) + upsertTestACLs(t, s.Server.FSM().State()) + + // insert peering directly to state store + p := &pbpeering.Peering{ + ID: testUUID(t), + Name: "foo", + State: pbpeering.PeeringState_ESTABLISHING, + PeerCAPems: nil, + PeerServerName: "test", + PeerServerAddresses: []string{"addr1"}, + ImportedServiceCount: 0, + ExportedServiceCount: 0, + } + err := s.Server.FSM().State().PeeringWrite(10, p) + require.NoError(t, err) + + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + + type testcase struct { + name string + req *pbpeering.PeeringReadRequest + expect *pbpeering.PeeringReadResponse + token string + expectErr string + } + run := func(t *testing.T, tc testcase) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + resp, err := client.PeeringRead(external.ContextWithToken(ctx, tc.token), tc.req) + if tc.expectErr != "" { + require.Contains(t, err.Error(), tc.expectErr) + return + } + require.NoError(t, err) + prototest.AssertDeepEqual(t, tc.expect, resp) + } + tcs := []testcase{ + { + name: "anonymous token lacks permissions", + req: &pbpeering.PeeringReadRequest{Name: "foo"}, + expect: &pbpeering.PeeringReadResponse{Peering: p}, + expectErr: "lacks permission 'peering:read'", + }, + { + name: "read token grants permission", + req: &pbpeering.PeeringReadRequest{ + Name: "foo", + }, + expect: &pbpeering.PeeringReadResponse{Peering: p}, + token: testTokenPeeringReadSecret, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + run(t, tc) + }) + } +} + func TestPeeringService_Delete(t *testing.T) { // TODO(peering): see note on newTestServer, refactor to not use this s := newTestServer(t, nil) @@ -308,6 +563,76 @@ func TestPeeringService_Delete(t *testing.T) { }) } +func TestPeeringService_Delete_ACLEnforcement(t *testing.T) { + // TODO(peering): see note on newTestServer, refactor to not use this + s := newTestServer(t, func(conf *consul.Config) { + conf.ACLsEnabled = true + conf.ACLResolverSettings.ACLDefaultPolicy = acl.PolicyDeny + }) + upsertTestACLs(t, s.Server.FSM().State()) + + p := &pbpeering.Peering{ + ID: testUUID(t), + Name: "foo", + State: pbpeering.PeeringState_ESTABLISHING, + PeerCAPems: nil, + PeerServerName: "test", + PeerServerAddresses: []string{"addr1"}, + } + err := s.Server.FSM().State().PeeringWrite(10, p) + require.NoError(t, err) + require.Nil(t, p.DeletedAt) + require.True(t, p.IsActive()) + + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + + type testcase struct { + name string + req *pbpeering.PeeringDeleteRequest + token string + expectErr string + } + run := func(t *testing.T, tc testcase) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + _, err = client.PeeringDelete(external.ContextWithToken(ctx, tc.token), tc.req) + if tc.expectErr != "" { + require.Contains(t, err.Error(), tc.expectErr) + return + } + require.NoError(t, err) + } + tcs := []testcase{ + { + name: "anonymous token lacks permissions", + req: &pbpeering.PeeringDeleteRequest{Name: "foo"}, + expectErr: "lacks permission 'peering:write'", + }, + { + name: "read token lacks permissions", + req: &pbpeering.PeeringDeleteRequest{ + Name: "foo", + }, + token: testTokenPeeringReadSecret, + expectErr: "lacks permission 'peering:write'", + }, + { + name: "write token grants permission", + req: &pbpeering.PeeringDeleteRequest{ + Name: "foo", + }, + token: testTokenPeeringWriteSecret, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + run(t, tc) + }) + } + +} + func TestPeeringService_List(t *testing.T) { // TODO(peering): see note on newTestServer, refactor to not use this s := newTestServer(t, nil) @@ -352,6 +677,78 @@ func TestPeeringService_List(t *testing.T) { prototest.AssertDeepEqual(t, expect, resp) } +func TestPeeringService_List_ACLEnforcement(t *testing.T) { + // TODO(peering): see note on newTestServer, refactor to not use this + s := newTestServer(t, func(conf *consul.Config) { + conf.ACLsEnabled = true + conf.ACLResolverSettings.ACLDefaultPolicy = acl.PolicyDeny + }) + upsertTestACLs(t, s.Server.FSM().State()) + + // insert peering directly to state store + foo := &pbpeering.Peering{ + ID: testUUID(t), + Name: "foo", + State: pbpeering.PeeringState_ESTABLISHING, + PeerCAPems: nil, + PeerServerName: "fooservername", + PeerServerAddresses: []string{"addr1"}, + ImportedServiceCount: 0, + ExportedServiceCount: 0, + } + require.NoError(t, s.Server.FSM().State().PeeringWrite(10, foo)) + bar := &pbpeering.Peering{ + ID: testUUID(t), + Name: "bar", + State: pbpeering.PeeringState_ACTIVE, + PeerCAPems: nil, + PeerServerName: "barservername", + PeerServerAddresses: []string{"addr1"}, + ImportedServiceCount: 0, + ExportedServiceCount: 0, + } + require.NoError(t, s.Server.FSM().State().PeeringWrite(15, bar)) + + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + + type testcase struct { + name string + token string + expect *pbpeering.PeeringListResponse + expectErr string + } + run := func(t *testing.T, tc testcase) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + resp, err := client.PeeringList(external.ContextWithToken(ctx, tc.token), &pbpeering.PeeringListRequest{}) + if tc.expectErr != "" { + require.Contains(t, err.Error(), tc.expectErr) + return + } + require.NoError(t, err) + prototest.AssertDeepEqual(t, tc.expect, resp) + } + tcs := []testcase{ + { + name: "anonymous token lacks permissions", + expectErr: "lacks permission 'peering:read'", + }, + { + name: "read token grants permission", + token: testTokenPeeringReadSecret, + expect: &pbpeering.PeeringListResponse{ + Peerings: []*pbpeering.Peering{bar, foo}, + }, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + run(t, tc) + }) + } +} + func TestPeeringService_TrustBundleRead(t *testing.T) { srv := newTestServer(t, nil) store := srv.Server.FSM().State() @@ -360,25 +757,6 @@ func TestPeeringService_TrustBundleRead(t *testing.T) { var lastIdx uint64 = 1 _ = setupTestPeering(t, store, "my-peering", lastIdx) - mysql := &structs.CheckServiceNode{ - Node: &structs.Node{ - Node: "node1", - Address: "10.0.0.1", - PeerName: "my-peering", - }, - Service: &structs.NodeService{ - ID: "mysql-1", - Service: "mysql", - Port: 5000, - PeerName: "my-peering", - }, - } - - lastIdx++ - require.NoError(t, store.EnsureNode(lastIdx, mysql.Node)) - lastIdx++ - require.NoError(t, store.EnsureService(lastIdx, mysql.Node.Node, mysql.Service)) - bundle := &pbpeering.PeeringTrustBundle{ TrustDomain: "peer1.com", PeerName: "my-peering", @@ -399,6 +777,76 @@ func TestPeeringService_TrustBundleRead(t *testing.T) { prototest.AssertDeepEqual(t, bundle, resp.Bundle) } +func TestPeeringService_TrustBundleRead_ACLEnforcement(t *testing.T) { + // TODO(peering): see note on newTestServer, refactor to not use this + s := newTestServer(t, func(conf *consul.Config) { + conf.ACLsEnabled = true + conf.ACLResolverSettings.ACLDefaultPolicy = acl.PolicyDeny + }) + store := s.Server.FSM().State() + upsertTestACLs(t, s.Server.FSM().State()) + + // Insert peering and trust bundle directly to state store. + _ = setupTestPeering(t, store, "my-peering", 10) + + bundle := &pbpeering.PeeringTrustBundle{ + TrustDomain: "peer1.com", + PeerName: "my-peering", + RootPEMs: []string{"peer1-root-1"}, + } + require.NoError(t, store.PeeringTrustBundleWrite(11, bundle)) + + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + + type testcase struct { + name string + req *pbpeering.TrustBundleReadRequest + token string + expect *pbpeering.PeeringTrustBundle + expectErr string + } + run := func(t *testing.T, tc testcase) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + resp, err := client.TrustBundleRead(external.ContextWithToken(ctx, tc.token), tc.req) + if tc.expectErr != "" { + require.Contains(t, err.Error(), tc.expectErr) + return + } + require.NoError(t, err) + prototest.AssertDeepEqual(t, tc.expect, resp.Bundle) + } + tcs := []testcase{ + { + name: "anonymous token lacks permissions", + req: &pbpeering.TrustBundleReadRequest{Name: "foo"}, + expectErr: "lacks permission 'service:write'", + }, + { + name: "service read token lacks permissions", + req: &pbpeering.TrustBundleReadRequest{ + Name: "my-peering", + }, + token: testTokenServiceReadSecret, + expectErr: "lacks permission 'service:write'", + }, + { + name: "with service write token", + req: &pbpeering.TrustBundleReadRequest{ + Name: "my-peering", + }, + token: testTokenServiceWriteSecret, + expect: bundle, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + run(t, tc) + }) + } +} + // Setup: // - Peerings "foo" and "bar" with trust bundles saved // - "api" service exported to both "foo" and "bar" @@ -495,6 +943,252 @@ func TestPeeringService_TrustBundleListByService(t *testing.T) { require.Equal(t, []string{"foo-root-1"}, resp.Bundles[1].RootPEMs) } +func TestPeeringService_validatePeer(t *testing.T) { + dir := testutil.TempDir(t, "consul") + signer, _, _ := tlsutil.GeneratePrivateKey() + ca, _, _ := tlsutil.GenerateCA(tlsutil.CAOpts{Signer: signer}) + cafile := path.Join(dir, "cacert.pem") + require.NoError(t, ioutil.WriteFile(cafile, []byte(ca), 0600)) + + s := newTestServer(t, func(c *consul.Config) { + c.SerfLANConfig.MemberlistConfig.AdvertiseAddr = "127.0.0.1" + c.TLSConfig.GRPC.CAFile = cafile + c.DataDir = dir + }) + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + testutil.RunStep(t, "generate a token", func(t *testing.T) { + req := pbpeering.GenerateTokenRequest{PeerName: "peerB"} + resp, err := client.GenerateToken(ctx, &req) + require.NoError(t, err) + require.NotEmpty(t, resp) + }) + + testutil.RunStep(t, "generate a token with the same name", func(t *testing.T) { + req := pbpeering.GenerateTokenRequest{PeerName: "peerB"} + resp, err := client.GenerateToken(ctx, &req) + require.NoError(t, err) + require.NotEmpty(t, resp) + }) + + validToken := peering.TestPeeringToken("83474a06-cca4-4ff4-99a4-4152929c8160") + validTokenJSON, _ := json.Marshal(&validToken) + validTokenB64 := base64.StdEncoding.EncodeToString(validTokenJSON) + + testutil.RunStep(t, "send an establish request for a different peer name", func(t *testing.T) { + resp, err := client.Establish(ctx, &pbpeering.EstablishRequest{ + PeerName: "peer1-usw1", + PeeringToken: validTokenB64, + }) + require.NoError(t, err) + require.NotEmpty(t, resp) + }) + + testutil.RunStep(t, "send an establish request for a different peer name again", func(t *testing.T) { + resp, err := client.Establish(ctx, &pbpeering.EstablishRequest{ + PeerName: "peer1-usw1", + PeeringToken: validTokenB64, + }) + require.NoError(t, err) + require.NotEmpty(t, resp) + }) + + testutil.RunStep(t, "attempt to generate token with the same name used as dialer", func(t *testing.T) { + req := pbpeering.GenerateTokenRequest{PeerName: "peer1-usw1"} + resp, err := client.GenerateToken(ctx, &req) + + require.Error(t, err) + require.Contains(t, err.Error(), + "cannot create peering with name: \"peer1-usw1\"; there is already an established peering") + require.Nil(t, resp) + }) + + testutil.RunStep(t, "attempt to establish the with the same name used as acceptor", func(t *testing.T) { + resp, err := client.Establish(ctx, &pbpeering.EstablishRequest{ + PeerName: "peerB", + PeeringToken: validTokenB64, + }) + + require.Error(t, err) + require.Contains(t, err.Error(), + "cannot create peering with name: \"peerB\"; there is an existing peering expecting to be dialed") + require.Nil(t, resp) + }) +} + +// Test RPC endpoint responses when peering is disabled. They should all return an error. +func TestPeeringService_PeeringDisabled(t *testing.T) { + // TODO(peering): see note on newTestServer, refactor to not use this + s := newTestServer(t, func(c *consul.Config) { c.PeeringEnabled = false }) + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + t.Cleanup(cancel) + + // assertFailedResponse is a helper function that checks the error from a gRPC + // response is what we expect when peering is disabled. + assertFailedResponse := func(t *testing.T, err error) { + actErr, ok := grpcstatus.FromError(err) + require.True(t, ok) + require.Equal(t, codes.FailedPrecondition, actErr.Code()) + require.Equal(t, "peering must be enabled to use this endpoint", actErr.Message()) + } + + // Test all the endpoints. + + t.Run("PeeringWrite", func(t *testing.T) { + _, err := client.PeeringWrite(ctx, &pbpeering.PeeringWriteRequest{}) + assertFailedResponse(t, err) + }) + + t.Run("PeeringRead", func(t *testing.T) { + _, err := client.PeeringRead(ctx, &pbpeering.PeeringReadRequest{}) + assertFailedResponse(t, err) + }) + + t.Run("PeeringDelete", func(t *testing.T) { + _, err := client.PeeringDelete(ctx, &pbpeering.PeeringDeleteRequest{}) + assertFailedResponse(t, err) + }) + + t.Run("PeeringList", func(t *testing.T) { + _, err := client.PeeringList(ctx, &pbpeering.PeeringListRequest{}) + assertFailedResponse(t, err) + }) + + t.Run("Establish", func(t *testing.T) { + _, err := client.Establish(ctx, &pbpeering.EstablishRequest{}) + assertFailedResponse(t, err) + }) + + t.Run("GenerateToken", func(t *testing.T) { + _, err := client.GenerateToken(ctx, &pbpeering.GenerateTokenRequest{}) + assertFailedResponse(t, err) + }) + + t.Run("TrustBundleRead", func(t *testing.T) { + _, err := client.TrustBundleRead(ctx, &pbpeering.TrustBundleReadRequest{}) + assertFailedResponse(t, err) + }) + + t.Run("TrustBundleListByService", func(t *testing.T) { + _, err := client.TrustBundleListByService(ctx, &pbpeering.TrustBundleListByServiceRequest{}) + assertFailedResponse(t, err) + }) +} + +func TestPeeringService_TrustBundleListByService_ACLEnforcement(t *testing.T) { + // TODO(peering): see note on newTestServer, refactor to not use this + s := newTestServer(t, func(conf *consul.Config) { + conf.ACLsEnabled = true + conf.ACLResolverSettings.ACLDefaultPolicy = acl.PolicyDeny + }) + store := s.Server.FSM().State() + upsertTestACLs(t, s.Server.FSM().State()) + + var lastIdx uint64 = 10 + + lastIdx++ + require.NoError(t, s.Server.FSM().State().PeeringWrite(lastIdx, &pbpeering.Peering{ + ID: testUUID(t), + Name: "foo", + State: pbpeering.PeeringState_ESTABLISHING, + PeerServerName: "test", + PeerServerAddresses: []string{"addr1"}, + })) + + lastIdx++ + require.NoError(t, store.PeeringTrustBundleWrite(lastIdx, &pbpeering.PeeringTrustBundle{ + TrustDomain: "foo.com", + PeerName: "foo", + RootPEMs: []string{"foo-root-1"}, + })) + + lastIdx++ + require.NoError(t, store.EnsureNode(lastIdx, &structs.Node{ + Node: "my-node", Address: "127.0.0.1", + })) + + lastIdx++ + require.NoError(t, store.EnsureService(lastIdx, "my-node", &structs.NodeService{ + ID: "api", + Service: "api", + Port: 8000, + })) + + entry := structs.ExportedServicesConfigEntry{ + Name: "default", + Services: []structs.ExportedService{ + { + Name: "api", + Consumers: []structs.ServiceConsumer{ + { + PeerName: "foo", + }, + }, + }, + }, + } + require.NoError(t, entry.Normalize()) + require.NoError(t, entry.Validate()) + + lastIdx++ + require.NoError(t, store.EnsureConfigEntry(lastIdx, &entry)) + + client := pbpeering.NewPeeringServiceClient(s.ClientConn(t)) + + type testcase struct { + name string + req *pbpeering.TrustBundleListByServiceRequest + token string + expect []string + expectErr string + } + run := func(t *testing.T, tc testcase) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + resp, err := client.TrustBundleListByService(external.ContextWithToken(ctx, tc.token), tc.req) + if tc.expectErr != "" { + require.Contains(t, err.Error(), tc.expectErr) + return + } + require.NoError(t, err) + require.Len(t, resp.Bundles, 1) + require.Equal(t, tc.expect, resp.Bundles[0].RootPEMs) + } + tcs := []testcase{ + { + name: "anonymous token lacks permissions", + req: &pbpeering.TrustBundleListByServiceRequest{ServiceName: "api"}, + expectErr: "lacks permission 'service:write'", + }, + { + name: "service read token lacks permission", + req: &pbpeering.TrustBundleListByServiceRequest{ + ServiceName: "api", + }, + token: testTokenServiceReadSecret, + expectErr: "lacks permission 'service:write'", + }, + { + name: "with service write token", + req: &pbpeering.TrustBundleListByServiceRequest{ + ServiceName: "api", + }, + token: testTokenServiceWriteSecret, + expect: []string{"foo-root-1"}, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + run(t, tc) + }) + } +} + // newTestServer is copied from partition/service_test.go, with the addition of certs/cas. // TODO(peering): these are endpoint tests and should live in the agent/consul // package. Instead, these can be written around a mock client (see testing.go) @@ -659,6 +1353,87 @@ func newDefaultDeps(t *testing.T, c *consul.Config) consul.Deps { } } +func upsertTestACLs(t *testing.T, store *state.Store) { + var ( + testPolicyPeeringReadID = "43fed171-ad1d-4d3b-9df3-c99c1c835c37" + testPolicyPeeringWriteID = "cddb0821-e720-4411-bbdd-cc62ce417eac" + + testPolicyServiceReadID = "0e054136-f5d3-4627-a7e6-198f1df923d3" + testPolicyServiceWriteID = "b55e03f4-c9dd-4210-8d24-f7ea8e2a1918" + ) + policies := structs.ACLPolicies{ + { + ID: testPolicyPeeringReadID, + Name: "peering-read", + Rules: `peering = "read"`, + Syntax: acl.SyntaxCurrent, + }, + { + ID: testPolicyPeeringWriteID, + Name: "peering-write", + Rules: `peering = "write"`, + Syntax: acl.SyntaxCurrent, + }, + { + ID: testPolicyServiceReadID, + Name: "service-read", + Rules: `service "api" { policy = "read" }`, + Syntax: acl.SyntaxCurrent, + }, + { + ID: testPolicyServiceWriteID, + Name: "service-write", + Rules: `service "api" { policy = "write" }`, + Syntax: acl.SyntaxCurrent, + }, + } + require.NoError(t, store.ACLPolicyBatchSet(100, policies)) + + tokens := structs.ACLTokens{ + &structs.ACLToken{ + AccessorID: "22500c91-723c-4335-be8a-6697417dc35b", + SecretID: testTokenPeeringReadSecret, + Description: "peering read", + Policies: []structs.ACLTokenPolicyLink{ + { + ID: testPolicyPeeringReadID, + }, + }, + }, + &structs.ACLToken{ + AccessorID: "de924f93-cfec-404c-9a7e-c1c9b96b8cae", + SecretID: testTokenPeeringWriteSecret, + Description: "peering write", + Policies: []structs.ACLTokenPolicyLink{ + { + ID: testPolicyPeeringWriteID, + }, + }, + }, + &structs.ACLToken{ + AccessorID: "53c54f79-ffed-47d4-904e-e2e0e40c0a01", + SecretID: testTokenServiceReadSecret, + Description: "service read", + Policies: []structs.ACLTokenPolicyLink{ + { + ID: testPolicyServiceReadID, + }, + }, + }, + &structs.ACLToken{ + AccessorID: "a100fa5f-db72-49f0-8f61-aa1f9f92f657", + SecretID: testTokenServiceWriteSecret, + Description: "service write", + Policies: []structs.ACLTokenPolicyLink{ + { + ID: testPolicyServiceWriteID, + }, + }, + }, + } + require.NoError(t, store.ACLTokenBatchSet(101, tokens, state.ACLTokenSetOptions{})) +} + func setupTestPeering(t *testing.T, store *state.Store, name string, index uint64) string { t.Helper() err := store.PeeringWrite(index, &pbpeering.Peering{ diff --git a/agent/rpc/peering/testing.go b/agent/rpc/peering/testing.go index de64dda7a..577f78229 100644 --- a/agent/rpc/peering/testing.go +++ b/agent/rpc/peering/testing.go @@ -1,6 +1,7 @@ package peering import ( + "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/proto/pbpeering" ) @@ -31,6 +32,7 @@ not valid ` var validAddress = "1.2.3.4:80" +var validHostnameAddress = "foo.bar.baz:80" var validServerName = "server.consul" @@ -53,6 +55,7 @@ func TestPeering(peerName string, state pbpeering.PeeringState, meta map[string] State: state, PeerID: validPeerID, Meta: meta, + Partition: acl.DefaultPartitionName, } } diff --git a/agent/rpc/peering/validate.go b/agent/rpc/peering/validate.go index 32a3d5d29..340e4c5ad 100644 --- a/agent/rpc/peering/validate.go +++ b/agent/rpc/peering/validate.go @@ -3,7 +3,6 @@ package peering import ( "fmt" "net" - "net/netip" "strconv" "github.com/hashicorp/consul/agent/connect" @@ -25,7 +24,7 @@ func validatePeeringToken(tok *structs.PeeringToken) error { return errPeeringTokenEmptyServerAddresses } for _, addr := range tok.ServerAddresses { - host, portRaw, err := net.SplitHostPort(addr) + _, portRaw, err := net.SplitHostPort(addr) if err != nil { return &errPeeringInvalidServerAddress{addr} } @@ -37,9 +36,6 @@ func validatePeeringToken(tok *structs.PeeringToken) error { if port < 1 || port > 65535 { return &errPeeringInvalidServerAddress{addr} } - if _, err := netip.ParseAddr(host); err != nil { - return &errPeeringInvalidServerAddress{addr} - } } // TODO(peering): validate name matches SNI? diff --git a/agent/rpc/peering/validate_test.go b/agent/rpc/peering/validate_test.go index 1f0660c8f..06e893a65 100644 --- a/agent/rpc/peering/validate_test.go +++ b/agent/rpc/peering/validate_test.go @@ -53,16 +53,6 @@ func TestValidatePeeringToken(t *testing.T) { "1.2.3.4", }, }, - { - name: "invalid address IP", - token: &structs.PeeringToken{ - CA: []string{validCA}, - ServerAddresses: []string{"foo.bar.baz"}, - }, - wantErr: &errPeeringInvalidServerAddress{ - "foo.bar.baz", - }, - }, { name: "invalid server name", token: &structs.PeeringToken{ @@ -89,6 +79,15 @@ func TestValidatePeeringToken(t *testing.T) { PeerID: validPeerID, }, }, + { + name: "valid token with hostname address", + token: &structs.PeeringToken{ + CA: []string{validCA}, + ServerAddresses: []string{validHostnameAddress}, + ServerName: validServerName, + PeerID: validPeerID, + }, + }, } for _, tc := range tt { diff --git a/agent/rpcclient/health/health.go b/agent/rpcclient/health/health.go index dd4be64ce..a4bdae78a 100644 --- a/agent/rpcclient/health/health.go +++ b/agent/rpcclient/health/health.go @@ -136,14 +136,14 @@ func (r serviceRequest) Type() string { } func (r serviceRequest) NewMaterializer() (submatview.Materializer, error) { - view, err := newHealthView(r.ServiceSpecificRequest) + view, err := NewHealthView(r.ServiceSpecificRequest) if err != nil { return nil, err } deps := submatview.Deps{ View: view, Logger: r.deps.Logger, - Request: newMaterializerRequest(r.ServiceSpecificRequest), + Request: NewMaterializerRequest(r.ServiceSpecificRequest), } return submatview.NewRPCMaterializer(pbsubscribe.NewStateChangeSubscriptionClient(r.deps.Conn), deps), nil diff --git a/agent/rpcclient/health/view.go b/agent/rpcclient/health/view.go index fa591b7b7..fd19cb4a0 100644 --- a/agent/rpcclient/health/view.go +++ b/agent/rpcclient/health/view.go @@ -21,7 +21,7 @@ type MaterializerDeps struct { Logger hclog.Logger } -func newMaterializerRequest(srvReq structs.ServiceSpecificRequest) func(index uint64) *pbsubscribe.SubscribeRequest { +func NewMaterializerRequest(srvReq structs.ServiceSpecificRequest) func(index uint64) *pbsubscribe.SubscribeRequest { return func(index uint64) *pbsubscribe.SubscribeRequest { req := &pbsubscribe.SubscribeRequest{ Topic: pbsubscribe.Topic_ServiceHealth, @@ -44,29 +44,29 @@ func newMaterializerRequest(srvReq structs.ServiceSpecificRequest) func(index ui } } -func newHealthView(req structs.ServiceSpecificRequest) (*healthView, error) { +func NewHealthView(req structs.ServiceSpecificRequest) (*HealthView, error) { fe, err := newFilterEvaluator(req) if err != nil { return nil, err } - return &healthView{ + return &HealthView{ state: make(map[string]structs.CheckServiceNode), filter: fe, }, nil } -// healthView implements submatview.View for storing the view state +// HealthView implements submatview.View for storing the view state // of a service health result. We store it as a map to make updates and // deletions a little easier but we could just store a result type // (IndexedCheckServiceNodes) and update it in place for each event - that // involves re-sorting each time etc. though. -type healthView struct { +type HealthView struct { state map[string]structs.CheckServiceNode filter filterEvaluator } // Update implements View -func (s *healthView) Update(events []*pbsubscribe.Event) error { +func (s *HealthView) Update(events []*pbsubscribe.Event) error { for _, event := range events { serviceHealth := event.GetServiceHealth() if serviceHealth == nil { @@ -181,7 +181,7 @@ func sortCheckServiceNodes(serviceNodes *structs.IndexedCheckServiceNodes) { } // Result returns the structs.IndexedCheckServiceNodes stored by this view. -func (s *healthView) Result(index uint64) interface{} { +func (s *HealthView) Result(index uint64) interface{} { result := structs.IndexedCheckServiceNodes{ Nodes: make(structs.CheckServiceNodes, 0, len(s.state)), QueryMeta: structs.QueryMeta{ @@ -197,7 +197,7 @@ func (s *healthView) Result(index uint64) interface{} { return &result } -func (s *healthView) Reset() { +func (s *HealthView) Reset() { s.state = make(map[string]structs.CheckServiceNode) } diff --git a/agent/rpcclient/health/view_test.go b/agent/rpcclient/health/view_test.go index ddc2afc1a..8fcb50da3 100644 --- a/agent/rpcclient/health/view_test.go +++ b/agent/rpcclient/health/view_test.go @@ -602,14 +602,14 @@ type serviceRequestStub struct { } func (r serviceRequestStub) NewMaterializer() (submatview.Materializer, error) { - view, err := newHealthView(r.ServiceSpecificRequest) + view, err := NewHealthView(r.ServiceSpecificRequest) if err != nil { return nil, err } deps := submatview.Deps{ View: view, Logger: hclog.New(nil), - Request: newMaterializerRequest(r.ServiceSpecificRequest), + Request: NewMaterializerRequest(r.ServiceSpecificRequest), } return submatview.NewRPCMaterializer(r.streamClient, deps), nil } diff --git a/agent/setup.go b/agent/setup.go index 9ac506ab6..c32960518 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -231,7 +231,8 @@ func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.Gau if isServer { gauges = append(gauges, consul.AutopilotGauges, - consul.LeaderCertExpirationGauges) + consul.LeaderCertExpirationGauges, + consul.LeaderPeeringMetrics) } // Flatten definitions diff --git a/agent/structs/acl.go b/agent/structs/acl.go index 82d19b8ac..1fd3f1d93 100644 --- a/agent/structs/acl.go +++ b/agent/structs/acl.go @@ -60,6 +60,7 @@ node_prefix "" { } operator = "write" mesh = "write" +peering = "write" query_prefix "" { policy = "write" } diff --git a/agent/structs/config_entry.go b/agent/structs/config_entry.go index 05d7480cb..8b3b0a8d2 100644 --- a/agent/structs/config_entry.go +++ b/agent/structs/config_entry.go @@ -3,6 +3,7 @@ package structs import ( "errors" "fmt" + "github.com/miekg/dns" "net" "strconv" "strings" @@ -201,8 +202,21 @@ func (e *ServiceConfigEntry) Validate() error { } if e.Destination != nil { - if err := validateEndpointAddress(e.Destination.Address); err != nil { - validationErr = multierror.Append(validationErr, fmt.Errorf("Destination address is invalid %w", err)) + if e.Destination.Addresses == nil || len(e.Destination.Addresses) == 0 { + validationErr = multierror.Append(validationErr, errors.New("Destination must contain at least one valid address")) + } + + seen := make(map[string]bool, len(e.Destination.Addresses)) + for _, address := range e.Destination.Addresses { + if _, ok := seen[address]; ok { + validationErr = multierror.Append(validationErr, fmt.Errorf("Duplicate address '%s' is not allowed", address)) + continue + } + seen[address] = true + + if err := validateEndpointAddress(address); err != nil { + validationErr = multierror.Append(validationErr, fmt.Errorf("Destination address '%s' is invalid %w", address, err)) + } } if e.Destination.Port < 1 || e.Destination.Port > 65535 { @@ -219,15 +233,12 @@ func validateEndpointAddress(address string) error { ip := net.ParseIP(address) valid = ip != nil - _, _, err := net.ParseCIDR(address) - valid = valid || err == nil - - // Since we don't know if this will be a TLS connection, setting tlsEnabled to false will be more permissive with wildcards - err = validateHost(false, address) - valid = valid || err == nil + hasWildcard := strings.Contains(address, "*") + _, ok := dns.IsDomainName(address) + valid = valid || (ok && !hasWildcard) if !valid { - return fmt.Errorf("Could not validate address %s as an IP, CIDR block or Hostname", address) + return fmt.Errorf("Could not validate address %s as an IP or Hostname", address) } return nil } @@ -294,20 +305,20 @@ func (c *UpstreamConfiguration) Clone() *UpstreamConfiguration { // DestinationConfig represents a virtual service, i.e. one that is external to Consul type DestinationConfig struct { - // Address of the endpoint; hostname, IP, or CIDR - Address string `json:",omitempty"` + // Addresses of the endpoint; hostname or IP + Addresses []string `json:",omitempty"` // Port allowed within this endpoint Port int `json:",omitempty"` } -func (d *DestinationConfig) HasHostname() bool { - ip := net.ParseIP(d.Address) +func IsHostname(address string) bool { + ip := net.ParseIP(address) return ip == nil } -func (d *DestinationConfig) HasIP() bool { - ip := net.ParseIP(d.Address) +func IsIP(address string) bool { + ip := net.ParseIP(address) return ip != nil } diff --git a/agent/structs/config_entry_test.go b/agent/structs/config_entry_test.go index afbd737f8..c3f5c7a98 100644 --- a/agent/structs/config_entry_test.go +++ b/agent/structs/config_entry_test.go @@ -434,7 +434,10 @@ func TestDecodeConfigEntry(t *testing.T) { name = "external" protocol = "tcp" destination { - address = "1.2.3.4/24" + addresses = [ + "api.google.com", + "web.google.com" + ] port = 8080 } `, @@ -443,7 +446,10 @@ func TestDecodeConfigEntry(t *testing.T) { Name = "external" Protocol = "tcp" Destination { - Address = "1.2.3.4/24" + Addresses = [ + "api.google.com", + "web.google.com" + ] Port = 8080 } `, @@ -452,8 +458,11 @@ func TestDecodeConfigEntry(t *testing.T) { Name: "external", Protocol: "tcp", Destination: &DestinationConfig{ - Address: "1.2.3.4/24", - Port: 8080, + Addresses: []string{ + "api.google.com", + "web.google.com", + }, + Port: 8080, }, }, }, @@ -2421,17 +2430,29 @@ func TestServiceConfigEntry(t *testing.T) { EnterpriseMeta: *DefaultEnterpriseMetaInDefaultPartition(), }, }, - "validate: missing destination address": { + "validate: nil destination address": { entry: &ServiceConfigEntry{ Kind: ServiceDefaults, Name: "external", Protocol: "tcp", Destination: &DestinationConfig{ - Address: "", - Port: 443, + Addresses: nil, + Port: 443, }, }, - validateErr: "Could not validate address", + validateErr: "must contain at least one valid address", + }, + "validate: empty destination address": { + entry: &ServiceConfigEntry{ + Kind: ServiceDefaults, + Name: "external", + Protocol: "tcp", + Destination: &DestinationConfig{ + Addresses: []string{}, + Port: 443, + }, + }, + validateErr: "must contain at least one valid address", }, "validate: destination ipv4 address": { entry: &ServiceConfigEntry{ @@ -2439,19 +2460,8 @@ func TestServiceConfigEntry(t *testing.T) { Name: "external", Protocol: "tcp", Destination: &DestinationConfig{ - Address: "1.2.3.4", - Port: 443, - }, - }, - }, - "validate: destination ipv4 CIDR address": { - entry: &ServiceConfigEntry{ - Kind: ServiceDefaults, - Name: "external", - Protocol: "tcp", - Destination: &DestinationConfig{ - Address: "10.0.0.1/16", - Port: 8080, + Addresses: []string{"1.2.3.4"}, + Port: 443, }, }, }, @@ -2461,8 +2471,8 @@ func TestServiceConfigEntry(t *testing.T) { Name: "external", Protocol: "tcp", Destination: &DestinationConfig{ - Address: "2001:0db8:0000:8a2e:0370:7334:1234:5678", - Port: 443, + Addresses: []string{"2001:0db8:0000:8a2e:0370:7334:1234:5678"}, + Port: 443, }, }, }, @@ -2472,19 +2482,8 @@ func TestServiceConfigEntry(t *testing.T) { Name: "external", Protocol: "tcp", Destination: &DestinationConfig{ - Address: "2001:db8::8a2e:370:7334", - Port: 443, - }, - }, - }, - "validate: destination ipv6 CIDR address": { - entry: &ServiceConfigEntry{ - Kind: ServiceDefaults, - Name: "external", - Protocol: "tcp", - Destination: &DestinationConfig{ - Address: "2001:db8::8a2e:370:7334/64", - Port: 443, + Addresses: []string{"2001:db8::8a2e:370:7334"}, + Port: 443, }, }, }, @@ -2494,7 +2493,7 @@ func TestServiceConfigEntry(t *testing.T) { Name: "external", Protocol: "tcp", Destination: &DestinationConfig{ - Address: "2001:db8::8a2e:370:7334/64", + Addresses: []string{"2001:db8::8a2e:370:7334"}, }, }, validateErr: "Invalid Port number", @@ -2505,8 +2504,8 @@ func TestServiceConfigEntry(t *testing.T) { Name: "external", Protocol: "tcp", Destination: &DestinationConfig{ - Address: "*external.com", - Port: 443, + Addresses: []string{"*external.com"}, + Port: 443, }, }, validateErr: "Could not validate address", @@ -2517,8 +2516,8 @@ func TestServiceConfigEntry(t *testing.T) { Name: "external", Protocol: "tcp", Destination: &DestinationConfig{ - Address: "..hello.", - Port: 443, + Addresses: []string{"..hello."}, + Port: 443, }, }, validateErr: "Could not validate address", @@ -2529,10 +2528,40 @@ func TestServiceConfigEntry(t *testing.T) { Name: "external", Protocol: "http", Destination: &DestinationConfig{ - Address: "*", - Port: 443, + Addresses: []string{"*"}, + Port: 443, }, }, + validateErr: "Could not validate address", + }, + "validate: multiple hostnames": { + entry: &ServiceConfigEntry{ + Kind: ServiceDefaults, + Name: "external", + Protocol: "http", + Destination: &DestinationConfig{ + Addresses: []string{ + "api.google.com", + "web.google.com", + }, + Port: 443, + }, + }, + }, + "validate: duplicate addresses not allowed": { + entry: &ServiceConfigEntry{ + Kind: ServiceDefaults, + Name: "external", + Protocol: "http", + Destination: &DestinationConfig{ + Addresses: []string{ + "api.google.com", + "api.google.com", + }, + Port: 443, + }, + }, + validateErr: "Duplicate address", }, } testConfigEntryNormalizeAndValidate(t, cases) diff --git a/agent/structs/prepared_query.go b/agent/structs/prepared_query.go index 440053f0b..cd8ec574b 100644 --- a/agent/structs/prepared_query.go +++ b/agent/structs/prepared_query.go @@ -10,9 +10,9 @@ import ( "github.com/hashicorp/consul/types" ) -// QueryDatacenterOptions sets options about how we fail over if there are no +// QueryFailoverOptions sets options about how we fail over if there are no // healthy nodes in the local datacenter. -type QueryDatacenterOptions struct { +type QueryFailoverOptions struct { // NearestN is set to the number of remote datacenters to try, based on // network coordinates. NearestN int @@ -21,6 +21,32 @@ type QueryDatacenterOptions struct { // never try a datacenter multiple times, so those are subtracted from // this list before proceeding. Datacenters []string + + // Targets is a fixed list of datacenters and peers to try. This field cannot + // be populated with NearestN or Datacenters. + Targets []QueryFailoverTarget +} + +// AsTargets either returns Targets as is or Datacenters converted into +// Targets. +func (f *QueryFailoverOptions) AsTargets() []QueryFailoverTarget { + if dcs := f.Datacenters; len(dcs) > 0 { + var targets []QueryFailoverTarget + for _, dc := range dcs { + targets = append(targets, QueryFailoverTarget{Datacenter: dc}) + } + return targets + } + + return f.Targets +} + +type QueryFailoverTarget struct { + // PeerName specifies a peer to try during failover. + PeerName string + + // Datacenter specifies a datacenter to try during failover. + Datacenter string } // QueryDNSOptions controls settings when query results are served over DNS. @@ -37,7 +63,7 @@ type ServiceQuery struct { // Failover controls what we do if there are no healthy nodes in the // local datacenter. - Failover QueryDatacenterOptions + Failover QueryFailoverOptions // If OnlyPassing is true then we will only include nodes with passing // health checks (critical AND warning checks will cause a node to be @@ -323,6 +349,9 @@ type PreparedQueryExecuteResponse struct { // Datacenter is the datacenter that these results came from. Datacenter string + // PeerName specifies the cluster peer that these results came from. + PeerName string + // Failovers is a count of how many times we had to query a remote // datacenter. Failovers int diff --git a/agent/structs/structs.go b/agent/structs/structs.go index 275bf4c18..afc913385 100644 --- a/agent/structs/structs.go +++ b/agent/structs/structs.go @@ -88,7 +88,7 @@ const ( const ( // LocalPeerKeyword is a reserved keyword used for indexing in the state store for objects in the local peer. - LocalPeerKeyword = "internal" + LocalPeerKeyword = "~" // DefaultPeerKeyword is the PeerName to use to refer to the local // cluster's own data, rather than replicated peered data. diff --git a/agent/uiserver/ui_template_data.go b/agent/uiserver/ui_template_data.go index 97dcbf51d..e28fc97fe 100644 --- a/agent/uiserver/ui_template_data.go +++ b/agent/uiserver/ui_template_data.go @@ -35,6 +35,7 @@ func uiTemplateDataFromConfig(cfg *config.RuntimeConfig) (map[string]interface{} "UIConfig": uiCfg, "LocalDatacenter": cfg.Datacenter, "PrimaryDatacenter": cfg.PrimaryDatacenter, + "PeeringEnabled": cfg.PeeringEnabled, } // Also inject additional provider scripts if needed, otherwise strip the diff --git a/agent/uiserver/uiserver_test.go b/agent/uiserver/uiserver_test.go index 97835f2b0..47110da5a 100644 --- a/agent/uiserver/uiserver_test.go +++ b/agent/uiserver/uiserver_test.go @@ -43,6 +43,7 @@ func TestUIServerIndex(t *testing.T) { "LocalDatacenter": "dc1", "PrimaryDatacenter": "dc1", "ContentPath": "/ui/", + "PeeringEnabled": true, "UIConfig": { "hcp_enabled": false, "metrics_provider": "", @@ -78,6 +79,7 @@ func TestUIServerIndex(t *testing.T) { "LocalDatacenter": "dc1", "PrimaryDatacenter": "dc1", "ContentPath": "/ui/", + "PeeringEnabled": true, "UIConfig": { "hcp_enabled": false, "metrics_provider": "foo", @@ -101,6 +103,7 @@ func TestUIServerIndex(t *testing.T) { "LocalDatacenter": "dc1", "PrimaryDatacenter": "dc1", "ContentPath": "/ui/", + "PeeringEnabled": true, "UIConfig": { "hcp_enabled": false, "metrics_provider": "", @@ -121,6 +124,7 @@ func TestUIServerIndex(t *testing.T) { "LocalDatacenter": "dc1", "PrimaryDatacenter": "dc1", "ContentPath": "/ui/", + "PeeringEnabled": true, "UIConfig": { "hcp_enabled": true, "metrics_provider": "", @@ -129,6 +133,29 @@ func TestUIServerIndex(t *testing.T) { } }`, }, + { + name: "peering disabled", + cfg: basicUIEnabledConfig( + withPeeringDisabled(), + ), + path: "/", + wantStatus: http.StatusOK, + wantContains: []string{" Requires consul-helm v0.32.1 or higher. +This page describes deploying a single Consul datacenter in multiple Kubernetes clusters, +with servers and clients running in one cluster and only clients in the rest of the clusters. +This example uses two Kubernetes clusters, but this approach could be extended to using more than two. -This page describes how to deploy a single Consul datacenter in multiple Kubernetes clusters, -with both servers and clients running in one cluster, and only clients running in the rest of the clusters. -In this example, we will use two Kubernetes clusters, but this approach could be extended to using more than two. +## Requirements + +* Consul-Helm version `v0.32.1` or higher +* This deployment topology requires that the Kubernetes clusters have a flat network +for both pods and nodes so that pods or nodes from one cluster can connect +to pods or nodes in another. In many hosted Kubernetes environments, this may have to be explicitly configured based on the hosting provider's network. Refer to the following documentation for instructions: + * [Azure AKS CNI](https://docs.microsoft.com/en-us/azure/aks/concepts-network#azure-cni-advanced-networking) + * [AWS EKS CNI](https://docs.aws.amazon.com/eks/latest/userguide/pod-networking.html) + * [GKE VPC-native clusters](https://cloud.google.com/kubernetes-engine/docs/concepts/alias-ips). + +If a flat network is unavailable across all Kubernetes clusters, follow the instructions for using [Admin Partitions](/docs/enterprise/admin-partitions), which is a Consul Enterprise feature. -~> **Note:** This deployment topology requires that your Kubernetes clusters have a flat network -for both pods and nodes, so that pods or nodes from one cluster can connect -to pods or nodes in another. If a flat network is not available across all Kubernetes clusters, follow the instructions for using [Admin Partitions](/docs/enterprise/admin-partitions), which is a Consul Enterprise feature. ## Prepare Helm release name ahead of installs @@ -23,7 +30,7 @@ The Helm chart uses the Helm release name as a prefix for the ACL resources that it creates, such as tokens and auth methods. If the names of the Helm releases are identical, subsequent Consul on Kubernetes clusters overwrite existing ACL resources and cause the clusters to fail. -Before you proceed with installation, prepare the Helm release names as environment variables for both the server and client installs to use. +Before proceeding with installation, prepare the Helm release names as environment variables for both the server and client install. ```shell-session $ export HELM_RELEASE_SERVER=server @@ -34,8 +41,7 @@ Before you proceed with installation, prepare the Helm release names as environm ## Deploying Consul servers and clients in the first cluster -First, we will deploy the Consul servers with Consul clients in the first cluster. -For that, we will use the following Helm configuration: +First, deploy the first cluster with Consul Servers and Clients with the example Helm configuration below. @@ -61,30 +67,30 @@ ui: -Note that we are deploying in a secure configuration, with gossip encryption, -TLS for all components, and ACLs. We are enabling the Consul Service Mesh and the controller for CRDs -so that we can use them to later verify that our services can connect with each other across clusters. +Note that this will deploy a secure configuration with gossip encryption, +TLS for all components and ACLs. In addition, this will enable the Consul Service Mesh and the controller for CRDs +that can be used later to verify the connectivity of services across clusters. -We're also setting UI's service type to be `NodePort`. -This is needed so that we can connect to servers from another cluster without using the pod IPs of the servers, +The UI's service type is set to be `NodePort`. +This is needed to connect to servers from another cluster without using the pod IPs of the servers, which are likely going to change. -To deploy, first we need to generate the Gossip encryption key and save it as a Kubernetes secret. +To deploy, first generate the Gossip encryption key and save it as a Kubernetes secret. ```shell $ kubectl create secret generic consul-gossip-encryption-key --from-literal=key=$(consul keygen) ``` -Now we can install our Consul cluster with Helm: +Now install Consul cluster with Helm: ```shell-session $ helm install ${HELM_RELEASE_SERVER} --values cluster1-config.yaml hashicorp/consul ``` -Once the installation finishes and all components are running and ready, -we need to extract the gossip encryption key we've created, the CA certificate -and the ACL bootstrap token generated during installation, -so that we can apply them to our second Kubernetes cluster. +Once the installation finishes and all components are running and ready, the following information needs to be extracted (using the below command) and applied to the second Kubernetes cluster. + * The Gossip encryption key created + * The CA certificate generated during installation + * The ACL bootstrap token generated during installation ```shell-session $ kubectl get secret consul-gossip-encryption-key ${HELM_RELEASE_SERVER}-consul-ca-cert ${HELM_RELEASE_SERVER}-consul-bootstrap-acl-token --output yaml > cluster1-credentials.yaml @@ -93,15 +99,19 @@ $ kubectl get secret consul-gossip-encryption-key ${HELM_RELEASE_SERVER}-consul- ## Deploying Consul clients in the second cluster ~> **Note:** If multiple Kubernetes clusters will be joined to the Consul Datacenter, then the following instructions will need to be repeated for each additional Kubernetes cluster. -Now we can switch to the second Kubernetes cluster where we will deploy only the Consul clients +Switch to the second Kubernetes cluster where Consul clients will be deployed that will join the first Consul cluster. -First, we need to apply credentials we've extracted from the first cluster to the second cluster: +```shell-session +$ kubectl config use-context +``` + +First, apply the credentials extracted from the first cluster to the second cluster: ```shell-session $ kubectl apply --filename cluster1-credentials.yaml ``` -To deploy in the second cluster, we will use the following Helm configuration: +To deploy in the second cluster, the following example Helm configuration will be used: @@ -145,14 +155,12 @@ connectInject: -Note that we're referencing secrets from the first cluster in ACL, gossip, and TLS configuration. - -Next, we need to set up the `externalServers` configuration. +Note the references to the secrets extracted and applied from the first cluster in ACL, gossip, and TLS configuration. The `externalServers.hosts` and `externalServers.httpsPort` refer to the IP and port of the UI's NodePort service deployed in the first cluster. Set the `externalServers.hosts` to any Node IP of the first cluster, -which you can see by running `kubectl get nodes --output wide`. +which can be seen by running `kubectl get nodes --output wide`. Set `externalServers.httpsPort` to the `nodePort` of the `cluster1-consul-ui` service. In our example, the port is `31557`. @@ -162,37 +170,37 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE cluster1-consul-ui NodePort 10.0.240.80 443:31557/TCP 40h ``` -We set the `externalServer.tlsServerName` to `server.dc1.consul`. This the DNS SAN +Set the `externalServer.tlsServerName` to `server.dc1.consul`. This the DNS SAN (Subject Alternative Name) that is present in the Consul server's certificate. -We need to set it because we're connecting to the Consul servers over the node IP, +This is required because the connection to the Consul servers uses the node IP, but that IP isn't present in the server's certificate. -To make sure that the hostname verification succeeds during the TLS handshake, we need to set the TLS +To make sure that the hostname verification succeeds during the TLS handshake, set the TLS server name to a DNS name that *is* present in the certificate. -Next, we need to set `externalServers.k8sAuthMethodHost` to the address of the second Kubernetes API server. -This should be the address that is reachable from the first cluster, and so it cannot be the internal DNS +Next, set `externalServers.k8sAuthMethodHost` to the address of the second Kubernetes API server. +This should be the address that is reachable from the first cluster, so it cannot be the internal DNS available in each Kubernetes cluster. Consul needs it so that `consul login` with the Kubernetes auth method will work from the second cluster. More specifically, the Consul server will need to perform the verification of the Kubernetes service account -whenever `consul login` is called, and to verify service accounts from the second cluster it needs to +whenever `consul login` is called, and to verify service accounts from the second cluster, it needs to reach the Kubernetes API in that cluster. -The easiest way to get it is to set it from your `kubeconfig` by running `kubectl config view` and grabbing +The easiest way to get it is from the `kubeconfig` by running `kubectl config view` and grabbing the value of `cluster.server` for the second cluster. -Lastly, we need to set up the clients so that they can discover the servers in the first cluster. -For this, we will use Consul's cloud auto-join feature -for the [Kubernetes provider](/docs/install/cloud-auto-join#kubernetes-k8s). -To use it we need to provide a way for the Consul clients to reach the first Kubernetes cluster. -To do that, we need to save the `kubeconfig` for the first cluster as a Kubernetes secret in the second cluster -and reference it in the `clients.join` value. Note that we're making that secret available to the client pods +Lastly, set up the clients so that they can discover the servers in the first cluster. +For this, Consul's cloud auto-join feature +for the [Kubernetes provider](/docs/install/cloud-auto-join#kubernetes-k8s) can be used. + +This can be configured by saving the `kubeconfig` for the first cluster as a Kubernetes secret in the second cluster +and referencing it in the `clients.join` value. Note that the secret is made available to the client pods by setting it in `client.extraVolumes`. -~> **Note:** The kubeconfig you're providing to the client should have minimal permissions. +~> **Note:** The kubeconfig provided to the client should have minimal permissions. The cloud auto-join provider will only need permission to read pods. Please see [Kubernetes Cloud auto-join](/docs/install/cloud-auto-join#kubernetes-k8s) for more details. -Now we're ready to install! +Now, proceed with the installation of the second cluster. ```shell-session $ helm install ${HELM_RELEASE_CLIENT} --values cluster2-config.yaml hashicorp/consul @@ -200,12 +208,11 @@ $ helm install ${HELM_RELEASE_CLIENT} --values cluster2-config.yaml hashicorp/co ## Verifying the Consul Service Mesh works -~> When Transparent proxy is enabled, services in one Kubernetes cluster that need to communicate with a service in another Kubernetes cluster must have a explicit upstream configured through the ["consul.hashicorp.com/connect-service-upstreams"](/docs/k8s/annotations-and-labels#consul-hashicorp-com-connect-service-upstreams) annotation. +~> When Transparent proxy is enabled, services in one Kubernetes cluster that need to communicate with a service in another Kubernetes cluster must have an explicit upstream configured through the ["consul.hashicorp.com/connect-service-upstreams"](/docs/k8s/annotations-and-labels#consul-hashicorp-com-connect-service-upstreams) annotation. -Now that we have our Consul cluster in multiple k8s clusters up and running, we will -deploy two services and verify that they can connect to each other. +Now that the Consul cluster spanning across multiple k8s clusters is up and running, deploy two services in separate k8s clusters and verify that they can connect to each other. -First, we'll deploy `static-server` service in the first cluster: +First, deploy `static-server` service in the first cluster: @@ -271,9 +278,9 @@ spec: -Note that we're defining a Service intention so that our services are allowed to talk to each other. +Note that defining a Service intention is required so that our services are allowed to talk to each other. -Then we'll deploy `static-client` in the second cluster with the following configuration: +Next, deploy `static-client` in the second cluster with the following configuration: @@ -321,9 +328,11 @@ spec: -Once both services are up and running, we can connect to the `static-server` from `static-client`: +Once both services are up and running, try connecting to the `static-server` from `static-client`: ```shell-session $ kubectl exec deploy/static-client -- curl --silent localhost:1234 "hello world" ``` + +A successful installation would return `hello world` for the above curl command output. diff --git a/website/content/docs/k8s/installation/vault/data-integration/bootstrap-token.mdx b/website/content/docs/k8s/installation/vault/data-integration/bootstrap-token.mdx index aa132e4b3..3d5a3d39a 100644 --- a/website/content/docs/k8s/installation/vault/data-integration/bootstrap-token.mdx +++ b/website/content/docs/k8s/installation/vault/data-integration/bootstrap-token.mdx @@ -9,14 +9,13 @@ description: >- This topic describes how to configure the Consul Helm chart to use an ACL bootstrap token stored in Vault. ## Overview -To use an ACL bootstrap token stored in Vault, we will follow the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section: - -### One time setup in Vault +To use an ACL bootstrap token stored in Vault, follow the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section. +Complete the following steps once: 1. Store the secret in Vault. 1. Create a Vault policy that authorizes the desired level of access to the secret. -### Setup per Consul datacenter +Repeat the following steps for each datacenter in the cluster: 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. 1. Update the Consul on Kubernetes helm chart. @@ -24,20 +23,20 @@ To use an ACL bootstrap token stored in Vault, we will follow the steps outlined Prior to setting up the data integration between Vault and Consul on Kubernetes, you will need to have: 1. Read and completed the steps in the [Systems Integration](/docs/k8s/installation/vault/systems-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). 2. Read the [Data Integration Overview](/docs/k8s/installation/vault/data-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). -## One time setup in Vault -### Generate and Store the Secret in Vault -First, generate and store the ACL bootstrap token in Vault: +## Store the Secret in Vault + +First, generate and store the ACL bootstrap token in Vault. You will only need to perform this action once: ```shell-session $ vault kv put secret/consul/bootstrap-token token="$(uuidgen | tr '[:upper:]' '[:lower:]')" ``` -### Create a Vault policy that authorizes the desired level of access to the secret +## Create Vault policy --> **Note:** The secret path referenced by the Vault Policy below will be your `global.acls.bootstrapToken.secretName` Helm value. +Next, you will need to create a Vault policy that allows read access to this secret. -Next, you will need to create a Vault policy that allows read access to this secret: +The path to the secret referenced in the `path` resource is the same value that you will configure in the `global.acls.bootstrapToken.secretName` Helm configuration (refer to [Update Consul on Kubernetes Helm chart](#update-consul-on-kubernetes-helm-chart)). @@ -55,8 +54,7 @@ Apply the Vault policy by issuing the `vault policy write` CLI command: $ vault policy write bootstrap-token-policy bootstrap-token-policy.hcl ``` -## Setup per Consul datacenter -### Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access +## Create Vault Authorization Roles for Consul Next, you will create Kubernetes auth roles for the Consul `server-acl-init` container that runs as part of the Consul server statefulset: @@ -75,7 +73,7 @@ you can run the following `helm template` command with your Consul on Kubernetes $ helm template --release-name ${RELEASE_NAME} -s templates/server-acl-init-serviceaccount.yaml hashicorp/consul ``` -### Update the Consul on Kubernetes helm chart +## Update Consul on Kubernetes Helm chart Now that you have configured Vault, you can configure the Consul Helm chart to use the ACL bootstrap token in Vault: diff --git a/website/content/docs/k8s/installation/vault/data-integration/connect-ca.mdx b/website/content/docs/k8s/installation/vault/data-integration/connect-ca.mdx index 121bb3ee2..46f53ec97 100644 --- a/website/content/docs/k8s/installation/vault/data-integration/connect-ca.mdx +++ b/website/content/docs/k8s/installation/vault/data-integration/connect-ca.mdx @@ -14,12 +14,12 @@ Consul allows using Kubernetes auth methods to configure Connect CA. This allows for automatic token rotation once the renewal is no longer possible. ## Overview -To use an Vault as the Service Mesh Certificate Provider on Kubernetes, we will need to modify the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section: +To use Vault as the service mesh certificate provider on Kubernetes, you will complete a modified version of the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section. -### One time setup in Vault +Complete the following steps once: 1. Create a Vault policy that authorizes the desired level of access to the secret. -### Setup per Consul datacenter +Repeat the following steps for each datacenter in the cluster: 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. 1. Update the Consul on Kubernetes helm chart. @@ -28,20 +28,14 @@ Prior to setting up the data integration between Vault and Consul on Kubernetes, 1. Read and completed the steps in the [Systems Integration](/docs/k8s/installation/vault/systems-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). 2. Read the [Data Integration Overview](/docs/k8s/installation/vault/data-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). -## One time setup in Vault -### Store the secret in Vault - -This step is not valid to this use case as we are not storing any secrets for Service Mesh certificate, and we instead are Leveraging Vault CA as a provider to mint certificates on an ongoing basis. - -### Create a Vault policy that authorizes the desired level of access to the secret +## Create Vault policy To configure [Vault as the provider](/docs/connect/ca/vault) for the Consul service mesh certificates, you will first need to decide on the type of policy that is suitable for you. To see the permissions that Consul would need in Vault, please see [Vault ACL policies](/docs/connect/ca/vault#vault-acl-policies) documentation. -## Setup per Consul datacenter -### Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access +## Create Vault Authorization Roles for Consul Next, you will create Kubernetes auth roles for the Consul servers: @@ -60,7 +54,7 @@ you can run: $ helm template --release-name ${RELEASE_NAME} --show-only templates/server-serviceaccount.yaml hashicorp/consul ``` -### Update the Consul on Kubernetes helm chart +## Update Consul on Kubernetes Helm chart Now you can configure the Consul Helm chart to use Vault as the Connect CA provider: diff --git a/website/content/docs/k8s/installation/vault/data-integration/enterprise-license.mdx b/website/content/docs/k8s/installation/vault/data-integration/enterprise-license.mdx index 4333a7162..08d7e16f1 100644 --- a/website/content/docs/k8s/installation/vault/data-integration/enterprise-license.mdx +++ b/website/content/docs/k8s/installation/vault/data-integration/enterprise-license.mdx @@ -9,13 +9,13 @@ description: >- This topic describes how to configure the Consul Helm chart to use an enterprise license stored in Vault. ## Overview -To use an enterprise license stored in Vault, we will follow the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section: +Complete the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section to use an enterprise license stored in Vault. -### One time setup in Vault +Complete the following steps once: 1. Store the secret in Vault. 1. Create a Vault policy that authorizes the desired level of access to the secret. -### Setup per Consul datacenter +Repeat the following steps for each datacenter in the cluster: 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. 1. Update the Consul on Kubernetes helm chart. @@ -24,8 +24,7 @@ Prior to setting up the data integration between Vault and Consul on Kubernetes, 1. Read and completed the steps in the [Systems Integration](/docs/k8s/installation/vault/systems-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). 2. Read the [Data Integration Overview](/docs/k8s/installation/vault/data-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). -## One time setup in Vault -### Store the Secret in Vault +## Store the Secret in Vault First, store the enterprise license in Vault: @@ -33,11 +32,11 @@ First, store the enterprise license in Vault: $ vault kv put secret/consul/license key="" ``` -### Create a Vault policy that authorizes the desired level of access to the secret +## Create Vault policy --> **Note:** The secret path referenced by the Vault Policy below will be your `global.enterpriseLicense.secretName` Helm value. +Next, you will need to create a policy that allows read access to this secret. -Next, you will need to create a policy that allows read access to this secret: +The path to the secret referenced in the `path` resource is the same value that you will configure in the `global.enterpriseLicense.secretName` Helm configuration (refer to [Update Consul on Kubernetes Helm chart](#update-consul-on-kubernetes-helm-chart)). @@ -55,8 +54,7 @@ Apply the Vault policy by issuing the `vault policy write` CLI command: $ vault policy write license-policy license-policy.hcl ``` -## Setup per Consul datacenter -### Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access +## Create Vault Authorization Roles for Consul Next, you will create Kubernetes auth roles for the Consul server and client: @@ -89,7 +87,7 @@ you can run the following `helm template` commands with your Consul on Kubernete $ helm template --release-name ${RELEASE_NAME} -s templates/client-serviceaccount.yaml hashicorp/consul ``` -### Update the Consul on Kubernetes helm chart. +## Update Consul on Kubernetes Helm chart. Now that you have configured Vault, you can configure the Consul Helm chart to use the enterprise enterprise license in Vault: diff --git a/website/content/docs/k8s/installation/vault/data-integration/gossip.mdx b/website/content/docs/k8s/installation/vault/data-integration/gossip.mdx index 57480a9ec..828b19307 100644 --- a/website/content/docs/k8s/installation/vault/data-integration/gossip.mdx +++ b/website/content/docs/k8s/installation/vault/data-integration/gossip.mdx @@ -7,14 +7,16 @@ description: >- # Storing Gossip Encryption Key in Vault -## Overview -To use a gossip encryption key stored in Vault, we will follow the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section: +This topic describes how to configure the Consul Helm chart to use TLS certificates issued by Vault in the Consul controller and connect inject webhooks. -### One time setup in Vault +## Overview +Complete the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section to use a gossip encryption key stored in Vault. + +Complete the following steps once: 1. Store the secret in Vault. 1. Create a Vault policy that authorizes the desired level of access to the secret. -### Setup per Consul datacenter +Repeat the following steps for each datacenter in the cluster: 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. 1. Update the Consul on Kubernetes helm chart. @@ -23,18 +25,17 @@ Prior to setting up the data integration between Vault and Consul on Kubernetes, 1. Read and completed the steps in the [Systems Integration](/docs/k8s/installation/vault/systems-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). 2. Read the [Data Integration Overview](/docs/k8s/installation/vault/data-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). -## One time setup in Vault -### Store the Secret in Vault -First, generate and store the gossip key in Vault: +## Store the Secret in Vault +First, generate and store the gossip key in Vault. You will only need to perform this action once: ```shell-session $ vault kv put secret/consul/gossip key="$(consul keygen)" ``` -### Create a Vault policy that authorizes the desired level of access to the secret +## Create Vault policy --> **Note:** The secret path referenced by the Vault Policy below will be your `global.gossipEncryption.secretName` Helm value. +Next, create a policy that allows read access to this secret. -Next, we will need to create a policy that allows read access to this secret: +The path to the secret referenced in the `path` resource is the same value that you will configure in the `global.gossipEncryption.secretName` Helm configuration (refer to [Update Consul on Kubernetes Helm chart](#update-consul-on-kubernetes-helm-chart)). @@ -52,8 +53,7 @@ Apply the Vault policy by issuing the `vault policy write` CLI command: $ vault policy write gossip-policy gossip-policy.hcl ``` -## Setup per Consul datacenter -### Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access +## Create Vault Authorization Roles for Consul Next, we will create Kubernetes auth roles for the Consul server and client: @@ -86,7 +86,7 @@ you can run the following `helm template` commands with your Consul on Kubernete $ helm template --release-name ${RELEASE_NAME} -s templates/client-serviceaccount.yaml hashicorp/consul ``` -### Update the Consul on Kubernetes helm chart +## Update Consul on Kubernetes Helm chart Now that we've configured Vault, you can configure the Consul Helm chart to use the gossip key in Vault: diff --git a/website/content/docs/k8s/installation/vault/data-integration/index.mdx b/website/content/docs/k8s/installation/vault/data-integration/index.mdx index 5007fcfe9..a7669f549 100644 --- a/website/content/docs/k8s/installation/vault/data-integration/index.mdx +++ b/website/content/docs/k8s/installation/vault/data-integration/index.mdx @@ -13,13 +13,13 @@ This topic describes an overview of how to configure Vault and Consul in order t ### General Integration Steps -Generally, for each secret you wish to store in Vault, the process to integrate the data between Vault and Consul on Kubernetes is: +You must complete two general procedures for each secret you wish to store in Vault. -#### One time setup in Vault +Complete the following steps once: 1. Store the secret in Vault. 1. Create a Vault policy that authorizes the desired level of access to the secret. -#### Setup per Consul datacenter +Repeat the following steps for each datacenter in the cluster: 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. 1. Update the Consul on Kubernetes helm chart. @@ -31,14 +31,13 @@ Prior to setting up the data integration between Vault and Consul on Kubernetes, Following the general integration steps, a more detailed workflow for integration of the [Gossip encryption key](/docs/k8s/installation/vault/data-integration/gossip) with the Vault Secrets backend would like the following: -#### One time setup in Vault +Complete the following steps once: 1. Store the secret in Vault. - Save the gossip encryption key in Vault at the path `secret/consul/gossip`. 1. Create a Vault policy that authorizes the desired level of access to the secret. - Create a Vault policy that you name `gossip-policy` which allows `read` access to the path `secret/consul/gossip`. -#### Setup per Consul datacenter - +Repeat the following steps for each datacenter in the cluster: 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. - Both Consul servers and Consul clients need access to the gossip encryption key, so you create two Vault Kubernetes: - A role called `consul-server` that maps the Kubernetes namespace and service account name for your consul servers to the `gossip-policy` created in [step 2](#one-time-setup-in-vault) of One time setup in Vault. diff --git a/website/content/docs/k8s/installation/vault/data-integration/partition-token.mdx b/website/content/docs/k8s/installation/vault/data-integration/partition-token.mdx index 2e7c7e68c..98c764fc5 100644 --- a/website/content/docs/k8s/installation/vault/data-integration/partition-token.mdx +++ b/website/content/docs/k8s/installation/vault/data-integration/partition-token.mdx @@ -10,13 +10,13 @@ description: >- This topic describes how to configure the Consul Helm chart to use an ACL partition token stored in Vault. ## Overview -To use an ACL partition token stored in Vault, we will follow the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section: +Complete the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section to use an ACL partition token stored in Vault. -### One time setup in Vault +Complete the following steps once: 1. Store the secret in Vault. 1. Create a Vault policy that authorizes the desired level of access to the secret. -### Setup per Consul datacenter +Repeat the following steps for each datacenter in the cluster: 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. 1. Update the Consul on Kubernetes helm chart. @@ -25,20 +25,19 @@ Prior to setting up the data integration between Vault and Consul on Kubernetes, 1. Read and completed the steps in the [Systems Integration](/docs/k8s/installation/vault/systems-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). 2. Read the [Data Integration Overview](/docs/k8s/installation/vault/data-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). -## One time setup in Vault -### Generate and Store the Secret in Vault +## Store the Secret in Vault -First, generate and store the ACL partition token in Vault: +First, generate and store the ACL partition token in Vault. You will only need to perform this action once: ```shell-session $ vault kv put secret/consul/partition-token token="$(uuidgen | tr '[:upper:]' '[:lower:]')" ``` -### Create a Vault policy that authorizes the desired level of access to the secret +## Create Vault policy --> **Note:** The secret path referenced by the Vault Policy below will be your `global.acls.partitionToken.secretName` Helm value. +Next, you will need to create a policy that allows read access to this secret. -Next, you will need to create a policy that allows read access to this secret: +The path to the secret referenced in the `path` resource is the same value that you will configure in the `global.acls.partitionToken.secretName` Helm configuration (refer to [Update Consul on Kubernetes Helm chart](#update-consul-on-kubernetes-helm-chart)). @@ -56,8 +55,7 @@ Apply the Vault policy by issuing the `vault policy write` CLI command: $ vault policy write partition-token-policy partition-token-policy.hcl ``` -## Setup per Consul datacenter -### Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access +## Create Vault Authorization Roles for Consul Next, you will create Kubernetes auth roles for the Consul `server-acl-init` job: @@ -76,7 +74,7 @@ you can run the following `helm template` command with your Consul on Kubernetes $ helm template --release-name ${RELEASE_NAME} -s templates/server-acl-init-serviceaccount.yaml hashicorp/consul ``` -### Update the Consul on Kubernetes helm chart +## Update Consul on Kubernetes Helm chart Now that you have configured Vault, you can configure the Consul Helm chart to use the ACL partition token key in Vault: diff --git a/website/content/docs/k8s/installation/vault/data-integration/replication-token.mdx b/website/content/docs/k8s/installation/vault/data-integration/replication-token.mdx index ed40fdea5..f17c3fcb7 100644 --- a/website/content/docs/k8s/installation/vault/data-integration/replication-token.mdx +++ b/website/content/docs/k8s/installation/vault/data-integration/replication-token.mdx @@ -9,13 +9,13 @@ description: >- This topic describes how to configure the Consul Helm chart to use an ACL replication token stored in Vault. ## Overview -To use an ACL replication token stored in Vault, we will follow the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section: +To use an ACL replication token stored in Vault, follow the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section. -### One time setup in Vault +Complete the following steps once: 1. Store the secret in Vault. 1. Create a Vault policy that authorizes the desired level of access to the secret. -### Setup per Consul datacenter +Repeat the following steps for each datacenter in the cluster: 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. 1. Update the Consul on Kubernetes helm chart. @@ -24,20 +24,19 @@ Prior to setting up the data integration between Vault and Consul on Kubernetes, 1. Read and completed the steps in the [Systems Integration](/docs/k8s/installation/vault/systems-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). 2. Read the [Data Integration Overview](/docs/k8s/installation/vault/data-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). -## One time setup in Vault -### Generate and Store the Secret in Vault +## Store the Secret in Vault -First, generate and store the ACL replication token in Vault: +First, generate and store the ACL replication token in Vault. You will only need to perform this action once: ```shell-session $ vault kv put secret/consul/replication-token token="$(uuidgen | tr '[:upper:]' '[:lower:]')" ``` -### Create a Vault policy that authorizes the desired level of access to the secret +## Create Vault policy --> **Note:** The secret path referenced by the Vault Policy below will be your `global.acls.replicationToken.secretName` Helm value. +Next, you will need to create a policy that allows read access to this secret. -Next, you will need to create a policy that allows read access to this secret: +The path to the secret referenced in the `path` resource is the same value that you will configure in the `global.acls.replicationToken.secretName` Helm configuration (refer to [Update Consul on Kubernetes Helm chart](#update-consul-on-kubernetes-helm-chart)). @@ -55,8 +54,7 @@ Apply the Vault policy by issuing the `vault policy write` CLI command: $ vault policy write replication-token-policy replication-token-policy.hcl ``` -## Setup per Consul datacenter -### Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access +## Create Vault Authorization Roles for Consul Next, you will create Kubernetes auth roles for the Consul `server-acl-init` job: @@ -75,7 +73,7 @@ you can run the following `helm template` command with your Consul on Kubernetes $ helm template --release-name ${RELEASE_NAME} -s templates/server-acl-init-serviceaccount.yaml hashicorp/consul ``` -### Update the Consul on Kubernetes helm chart +## Update Consul on Kubernetes Helm chart Now that you have configured Vault, you can configure the Consul Helm chart to use the ACL replication token key in Vault: diff --git a/website/content/docs/k8s/installation/vault/data-integration/server-tls.mdx b/website/content/docs/k8s/installation/vault/data-integration/server-tls.mdx index 38626c9dd..382a53ed9 100644 --- a/website/content/docs/k8s/installation/vault/data-integration/server-tls.mdx +++ b/website/content/docs/k8s/installation/vault/data-integration/server-tls.mdx @@ -8,12 +8,12 @@ description: >- # Vault as the Server TLS Certificate Provider on Kubernetes ## Overview -To use an Vault as the Server TLS Certificate Provider on Kubernetes, we will need to modify the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section: +To use Vault as the server TLS certificate provider on Kubernetes, complete a modified version of the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section. -### One time setup in Vault +Complete the following steps once: 1. Create a Vault policy that authorizes the desired level of access to the secret. -### Setup per Consul datacenter +Repeat the following steps for each datacenter in the cluster: 1. (Added) Configure allowed domains for PKI certificates 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. 1. Update the Consul on Kubernetes helm chart. @@ -24,11 +24,10 @@ Prior to setting up the data integration between Vault and Consul on Kubernetes, 2. Read the [Data Integration Overview](/docs/k8s/installation/vault/data-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). 3. Complete the [Bootstrapping the PKI Engine](#bootstrapping-the-pki-engine) section. -### Bootstrapping the PKI Engine +## Bootstrapping the PKI Engine -First, we need to bootstrap the Vault cluster by enabling and configuring the PKI Secrets Engine to be able to serve -TLS certificates to Consul. The process can be as simple as the following, or more complicated such as in this [example](https://learn.hashicorp.com/tutorials/consul/vault-pki-consul-secure-tls) -which also uses an intermediate signing authority. +Issue the following commands to enable and configure the PKI Secrets Engine to server +TLS certificates to Consul. * Enable the PKI Secrets Engine: @@ -51,142 +50,128 @@ which also uses an intermediate signing authority. common_name="dc1.consul" \ ttl=87600h ``` -## One time setup in Vault -### Store the secret in Vault - -This step is not valid to this use case because we are not storing a single secret. We are configuring Vault as a provider to mint certificates on an ongoing basis. - -### Create a Vault policy that authorizes the desired level of access to the secret +## Create Vault policies To use Vault to issue Server TLS certificates, you will need to create the following: -1. Vault Policies that will allow the Consul server to access the certificate issuing url. -1. Vault Policies that will allow the Consul components, e.g. ingress gateways, controller, to access the CA url. +1. Create a policy that allows `["create", "update"]` access to the + [certificate issuing URL](https://www.vaultproject.io/api/secret/pki#generate-certificate) so the Consul servers can + fetch a new certificate/key pair. -#### Create Vault Policies for the Server TLS Certificates + The path to the secret referenced in the `path` resource is the same value that you will configure in the `server.serverCert.secretName` Helm configuration (refer to [Update Consul on Kubernetes Helm chart](#update-consul-on-kubernetes-helm-chart)). --> **Note:** The PKI secret path referenced by the Vault Policy below will be your `server.serverCert.secretName` Helm value. + -Next we will create a policy that allows `["create", "update"]` access to the -[certificate issuing URL](https://www.vaultproject.io/api/secret/pki#generate-certificate) so the Consul servers can -fetch a new certificate/key pair. + ```HCL + path "pki/issue/consul-server" { + capabilities = ["create", "update"] + } + ``` - + -```HCL -path "pki/issue/consul-server" { - capabilities = ["create", "update"] -} -``` +1. Apply the Vault policy by issuing the `vault policy write` CLI command: - + ```shell-session + $ vault policy write consul-server consul-server-policy.hcl + ``` -Apply the Vault policy by issuing the `vault policy write` CLI command: - -```shell-session -$ vault policy write consul-server consul-server-policy.hcl -``` - -#### Create Vault Policies for the CA URL - -Next, we will create a policy that allows `["read"]` access to the [CA URL](https://www.vaultproject.io/api/secret/pki#read-certificate), +1. Create a policy that allows `["read"]` access to the [CA URL](https://www.vaultproject.io/api/secret/pki#read-certificate), this is required for the Consul components to communicate with the Consul servers in order to fetch their auto-encryption certificates. + + The path to the secret referenced in the `path` resource is the same value that you will configure in the `global.tls.caCert.secretName` Helm configuration (refer to [Update Consul on Kubernetes Helm chart](#update-consul-on-kubernetes-helm-chart)). - + -```HCL -path "pki/cert/ca" { - capabilities = ["read"] -} -``` + ```HCL + path "pki/cert/ca" { + capabilities = ["read"] + } + ``` - + -```shell-session -$ vault policy write ca-policy ca-policy.hcl -``` + ```shell-session + $ vault policy write ca-policy ca-policy.hcl + ``` --> **Note:** The PKI secret path referenced by the above Policy will be your `global.tls.caCert.secretName` Helm value. +1. Configure allowed domains for PKI certificates. -## Setup per Consul datacenter -### Configure allowed domains for PKI certificates + Next, a Vault role for the PKI engine will set the default certificate issuance parameters: -Next, a Vault role for the PKI engine will set the default certificate issuance parameters: + ```shell-session + $ vault write pki/roles/consul-server \ + allowed_domains="" \ + allow_subdomains=true \ + allow_bare_domains=true \ + allow_localhost=true \ + generate_lease=true \ + max_ttl="720h" + ``` -```shell-session -$ vault write pki/roles/consul-server \ - allowed_domains="" \ - allow_subdomains=true \ - allow_bare_domains=true \ - allow_localhost=true \ - generate_lease=true \ - max_ttl="720h" -``` + To generate the `` use the following script as a template: -To generate the `` use the following script as a template: + ```shell-session + #!/bin/sh -```shell-session -#!/bin/sh + # NAME is set to either the value from `global.name` from your Consul K8s value file, or your $HELM_RELEASE_NAME-consul + export NAME=consulk8s + # NAMESPACE is where the Consul on Kubernetes is installed + export NAMESPACE=consul + # DATACENTER is the value of `global.datacenter` from your Helm values config file + export DATACENTER=dc1 -# NAME is set to either the value from `global.name` from your Consul K8s value file, or your $HELM_RELEASE_NAME-consul -export NAME=consulk8s -# NAMESPACE is where the Consul on Kubernetes is installed -export NAMESPACE=consul -# DATACENTER is the value of `global.datacenter` from your Helm values config file -export DATACENTER=dc1 + echo allowed_domains=\"$DATACENTER.consul, $NAME-server, $NAME-server.$NAMESPACE, $NAME-server.$NAMESPACE.svc\" + ``` -echo allowed_domains=\"$DATACENTER.consul, $NAME-server, $NAME-server.$NAMESPACE, $NAME-server.$NAMESPACE.svc\" -``` +1. Finally, Kubernetes auth roles need to be created for servers, clients, and components. -### Link the Vault policies to Consul workloads -Create three Vault auth roles, one for the Consul servers, one for the Consul clients, and one for Consul components, that link the policy to each Consul workload on Kubernetes service account that requires access. + Role for Consul servers: + ```shell-session + $ vault write auth/kubernetes/role/consul-server \ + bound_service_account_names= \ + bound_service_account_namespaces= \ + policies=consul-server \ + ttl=1h + ``` -Role for Consul servers: -```shell-session -$ vault write auth/kubernetes/role/consul-server \ - bound_service_account_names= \ - bound_service_account_namespaces= \ - policies=consul-server \ - ttl=1h -``` + To find out the service account name of the Consul server, + you can run: -To find out the service account name of the Consul server, -you can run: + ```shell-session + $ helm template --release-name ${RELEASE_NAME} --show-only templates/server-serviceaccount.yaml hashicorp/consul + ``` -```shell-session - $ helm template --release-name ${RELEASE_NAME} --show-only templates/server-serviceaccount.yaml hashicorp/consul -``` + Role for Consul clients: -Role for Consul clients: + ```shell-session + $ vault write auth/kubernetes/role/consul-client \ + bound_service_account_names= \ + bound_service_account_namespaces=default \ + policies=ca-policy \ + ttl=1h + ``` -```shell-session -$ vault write auth/kubernetes/role/consul-client \ - bound_service_account_names= \ - bound_service_account_namespaces=default \ - policies=ca-policy \ - ttl=1h -``` + To find out the service account name of the Consul client, use the command below. + ```shell-session + $ helm template --release-name ${RELEASE_NAME} --show-only templates/client-serviceaccount.yaml hashicorp/consul + ``` -To find out the service account name of the Consul client, use the command below. -```shell-session - $ helm template --release-name ${RELEASE_NAME} --show-only templates/client-serviceaccount.yaml hashicorp/consul -``` + Role for CA components: + ```shell-session + $ vault write auth/kubernetes/role/consul-ca \ + bound_service_account_names="*" \ + bound_service_account_namespaces= \ + policies=ca-policy \ + ttl=1h + ``` -Role for CA components: -```shell-session -$ vault write auth/kubernetes/role/consul-ca \ - bound_service_account_names="*" \ - bound_service_account_namespaces= \ - policies=ca-policy \ - ttl=1h -``` + The above Vault Roles will now be your Helm values for `global.secretsBackend.vault.consulServerRole` and + `global.secretsBackend.vault.consulCARole` respectively. -The above Vault Roles will now be your Helm values for `global.secretsBackend.vault.consulServerRole` and -`global.secretsBackend.vault.consulCARole` respectively. +## Update Consul on Kubernetes Helm chart -### Update the Consul on Kubernetes helm chart - -Now that we've configured Vault, you can configure the Consul Helm chart to -use the Server TLS certificates from Vault: +Next, configure the Consul Helm chart to +use the server TLS certificates from Vault: diff --git a/website/content/docs/k8s/installation/vault/data-integration/snapshot-agent-config.mdx b/website/content/docs/k8s/installation/vault/data-integration/snapshot-agent-config.mdx index b8e4a1646..6a0d913cd 100644 --- a/website/content/docs/k8s/installation/vault/data-integration/snapshot-agent-config.mdx +++ b/website/content/docs/k8s/installation/vault/data-integration/snapshot-agent-config.mdx @@ -9,15 +9,13 @@ description: >- This topic describes how to configure the Consul Helm chart to use a snapshot agent config stored in Vault. ## Overview -To use an ACL replication token stored in Vault, we will follow the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section: - -### One time setup in Vault +To use an ACL replication token stored in Vault, follow the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section. +Complete the following steps once: 1. Store the secret in Vault. 1. Create a Vault policy that authorizes the desired level of access to the secret. -### Setup per Consul datacenter - +Repeat the following steps for each datacenter in the cluster: 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. 1. Update the Consul on Kubernetes helm chart. @@ -26,8 +24,7 @@ Prior to setting up the data integration between Vault and Consul on Kubernetes, 1. Read and completed the steps in the [Systems Integration](/docs/k8s/installation/vault/systems-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). 2. Read the [Data Integration Overview](/docs/k8s/installation/vault/data-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). -## One time setup in Vault -### Store the Secret in Vault +## Store the Secret in Vault First, store the snapshot agent config in Vault: @@ -35,11 +32,11 @@ First, store the snapshot agent config in Vault: $ vault kv put secret/consul/snapshot-agent-config key="" ``` -### Create a Vault policy that authorizes the desired level of access to the secret +## Create Vault policy --> **Note:** The secret path referenced by the Vault Policy below will be your `client.snapshotAgent.configSecret.secretName` Helm value. +Next, you will need to create a policy that allows read access to this secret. -Next, you will need to create a policy that allows read access to this secret: +The path to the secret referenced in the `path` resource is the same values that you will configure in the `client.snapshotAgent.configSecret.secretName` Helm configuration (refer to [Update Consul on Kubernetes Helm chart](#update-consul-on-kubernetes-helm-chart)). @@ -57,8 +54,7 @@ Apply the Vault policy by issuing the `vault policy write` CLI command: $ vault policy write snapshot-agent-config-policy snapshot-agent-config-policy.hcl ``` -## Setup per Consul datacenter -### Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access +## Create Vault Authorization Roles for Consul Next, you will create a Kubernetes auth role for the Consul snapshot agent: @@ -77,10 +73,10 @@ you can run the following `helm template` command with your Consul on Kubernetes $ helm template --release-name ${RELEASE_NAME} -s templates/client-snapshot-agent-serviceaccount.yaml hashicorp/consul ``` -### Update the Consul on Kubernetes helm chart +## Update Consul on Kubernetes Helm chart Now that you have configured Vault, you can configure the Consul Helm chart to -use the snapshot agent config in Vault: +use the snapshot agent configuration in Vault: diff --git a/website/content/docs/k8s/installation/vault/data-integration/webhook-certs.mdx b/website/content/docs/k8s/installation/vault/data-integration/webhook-certs.mdx index 028ebb496..4615a040c 100644 --- a/website/content/docs/k8s/installation/vault/data-integration/webhook-certs.mdx +++ b/website/content/docs/k8s/installation/vault/data-integration/webhook-certs.mdx @@ -21,7 +21,7 @@ When Vault is configured as the controller and connect inject Webhook Certificat To use Vault as the controller and connect inject Webhook Certificate Provider, we will need to modify the steps outlined in the [Data Integration](/docs/k8s/installation/vault/data-integration) section: -### Setup per Consul datacenter +These following steps will be repeated for each datacenter: 1. Create a Vault policy that authorizes the desired level of access to the secret. 1. (Added) Create Vault PKI roles for controller and connect inject each that establish the domains that each is allowed to issue certificates for. 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. @@ -33,11 +33,9 @@ Complete the following prerequisites prior to implementing the integration descr 1. You should be familiar with the [Data Integration Overview](/docs/k8s/installation/vault/data-integration) section of [Vault as a Secrets Backend](/docs/k8s/installation/vault). 1. Configure [Vault as the Server TLS Certificate Provider on Kubernetes](/docs/k8s/installation/vault/data-integration/server-tls) 1. Configure [Vault as the Service Mesh Certificate Provider on Kubernetes](/docs/k8s/installation/vault/data-integration/connect-ca) -1. Complete the [Bootstrapping the PKI Engine for Controller and Connect Inject Webhooks](#bootstrapping-the-pki-engine-for-controller-and-connect-inject-webhooks) section. -### Bootstrapping the PKI Engine for Controller and Connect Inject Webhooks - -The first step is to bootstrap the Vault cluster. Issue the following commands to enable and configure the PKI Secrets Engine to serve TLS certificates for the controller and connect inject webhooks: +## Bootstrapping the PKI Engines +Issue the following commands to enable and configure the PKI Secrets Engine to serve TLS certificates for the controller and connect inject webhooks: * Mount the PKI Secrets Engine for each: @@ -72,138 +70,124 @@ The first step is to bootstrap the Vault cluster. Issue the following commands t common_name="-connect-injector" \ ttl=87600h ``` -## Setup per Consul datacenter -You will need to preform the following steps for each datacenter that you would like to manage controller and connect inject webhook certificates in Vault. You will want to take care to create different names per datacenter for every pki mount, role, and policy. +## Create Vault Policies +1. Create a policy that allows `["create", "update"]` access to the +[certificate issuing URL](https://www.vaultproject.io/api/secret/pki#generate-certificate) so Consul controller and connect inject can fetch a new certificate/key pair and provide it to the Kubernetes `mutatingwebhookconfiguration`. -### Create a Vault policy that authorizes the desired level of access to the secret -To use Vault to issue controller or connect inject webhook certificates, you will need to create the Vault policies that will allow either controller or connect inject to access its respective certificate-issuing URL. + The path to the secret referenced in the `path` resource is the same value that you will configure in the `global.secretsBackend.vault.controller.tlsCert.secretName` and `global.secretsBackend.vault.connectInject.tlsCert.secretName` Helm configuration (refer to [Update Consul on Kubernetes Helm chart](#update-consul-on-kubernetes-helm-chart)). -#### Create Vault Policies for the Controller and Connect Inject Webhook Certificates + ```shell-session + $ vault policy write controller-tls-policy - < **Note:** The PKI secret paths referenced by the Vault Policies below will be your `global.secretsBackend.vault.controller.tlsCert.secretName` and `global.secretsBackend.vault.connectInject.tlsCert.secretName` Helm values respectively. + ```shell-session + $ vault policy write connect-inject-policy - <` for each use the following script as a template: + + ```shell-session + #!/bin/sh + + # NAME is set to either the value from `global.name` from your Consul K8s value file, or your $HELM_RELEASE_NAME-consul + export NAME=consulk8s + # NAMESPACE is where the Consul on Kubernetes is installed + export NAMESPACE=consul + # DATACENTER is the value of `global.datacenter` from your Helm values config file + export DATACENTER=dc1 + + echo allowed_domains_controller=\"${NAME}-controller-webhook,${NAME}-controller-webhook.${NAMESPACE},${NAME}-controller-webhook.${NAMESPACE}.svc,${NAME}-controller-webhook.${NAMESPACE}.svc.cluster.local\"" + + echo allowed_domains_connect_inject=\"${NAME}-connect-injector,${NAME}-connect-injector.${NAMESPACE},${NAME}-connect-injector.${NAMESPACE}.svc,${NAME}-connect-injector.${NAMESPACE}.svc.cluster.local\"" + ``` + +1. Finally, Kubernetes auth roles need to be created for controller and connect inject webhooks. -```shell-session -$ vault policy write controller-tls-policy - < \ + bound_service_account_namespaces= \ + policies=controller-ca-policy \ + ttl=1h + ``` --> **Note:** The PKI secret paths referenced by the Vault Policies below will be your `global.secretsBackend.vault.controller.caCert.secretName` and `global.secretsBackend.vault.connectInject.caCert.secretName` Helm values respectively. + To find out the service account name of the Consul controller, + you can run: -Next, create a policy that allows `["read"]` access to the [CA URL](https://www.vaultproject.io/api/secret/pki#read-certificate). The policy is required so that Consul components can communicate with the Consul servers in order to fetch their auto-encryption certificates. Issue the following commands to create the policy: + ```shell-session + $ helm template --release-name ${RELEASE_NAME} --show-only templates/controller-serviceaccount.yaml hashicorp/consul + ``` -```shell-session -$ vault policy write controller-ca-policy - < \ + bound_service_account_namespaces= \ + policies=connect-inject-ca-policy \ + ttl=1h + ``` -Issue the following command to create a Vault role for the controller PKI engine and set the default parameters for issuing certificates: + To find out the service account name of the Consul connect inject, use the command below. + ```shell-session + $ helm template --release-name ${RELEASE_NAME} --show-only templates/connect-inject-serviceaccount.yaml hashicorp/consul + ``` -```shell-session -$ vault write controller/roles/controller-role \ - allowed_domains="" \ - allow_subdomains=true \ - allow_bare_domains=true \ - allow_localhost=true \ - generate_lease=true \ - max_ttl="720h" -``` - -Issue the following command to create a Vault role for the connect inject PKI engine and set the default parameters for issuing certificates: - -```shell-session -$ vault write connect-inject/roles/connect-inject-role \ - allowed_domains="" \ - allow_subdomains=true \ - allow_bare_domains=true \ - allow_localhost=true \ - generate_lease=true \ - max_ttl="720h" -``` - -To generate the `` for each use the following script as a template: - -```shell-session -#!/bin/sh - -# NAME is set to either the value from `global.name` from your Consul K8s value file, or your $HELM_RELEASE_NAME-consul -export NAME=consulk8s -# NAMESPACE is where the Consul on Kubernetes is installed -export NAMESPACE=consul -# DATACENTER is the value of `global.datacenter` from your Helm values config file -export DATACENTER=dc1 - -echo allowed_domains_controller=\"${NAME}-controller-webhook,${NAME}-controller-webhook.${NAMESPACE},${NAME}-controller-webhook.${NAMESPACE}.svc,${NAME}-controller-webhook.${NAMESPACE}.svc.cluster.local\"" - -echo allowed_domains_connect_inject=\"${NAME}-connect-injector,${NAME}-connect-injector.${NAMESPACE},${NAME}-connect-injector.${NAMESPACE}.svc,${NAME}-connect-injector.${NAMESPACE}.svc.cluster.local\"" -``` - -### Create a Vault auth roles that link the policy to each Consul on Kubernetes service account that requires access - --> **Note:** The Vault auth roles below will be your `global.secretsBackend.vault.controllerRole` and `global.secretsBackend.vault.connectInjectRole` Helm values respectively. - - -Finally, Kubernetes auth roles need to be created for controller and connect inject webhooks. - -Role for Consul controller webhooks: -```shell-session -$ vault write auth/kubernetes/role/controller-role \ - bound_service_account_names= \ - bound_service_account_namespaces= \ - policies=controller-ca-policy \ - ttl=1h -``` - -To find out the service account name of the Consul controller, -you can run: - -```shell-session - $ helm template --release-name ${RELEASE_NAME} --show-only templates/controller-serviceaccount.yaml hashicorp/consul -``` - -Role for Consul connect inject webhooks: - -```shell-session -$ vault write auth/kubernetes/role/connect-inject-role \ - bound_service_account_names= \ - bound_service_account_namespaces= \ - policies=connect-inject-ca-policy \ - ttl=1h -``` - -To find out the service account name of the Consul connect inject, use the command below. -```shell-session - $ helm template --release-name ${RELEASE_NAME} --show-only templates/connect-inject-serviceaccount.yaml hashicorp/consul -``` - -### Update the Consul on Kubernetes helm chart +## Update Consul on Kubernetes Helm chart Now that we've configured Vault, you can configure the Consul Helm chart to use the Server TLS certificates from Vault: diff --git a/website/content/docs/k8s/installation/vault/systems-integration.mdx b/website/content/docs/k8s/installation/vault/systems-integration.mdx index 02e01b971..a48bcdb45 100644 --- a/website/content/docs/k8s/installation/vault/systems-integration.mdx +++ b/website/content/docs/k8s/installation/vault/systems-integration.mdx @@ -8,24 +8,20 @@ description: >- # Vault as the Secrets Backend - Systems Integration ## Overview -At a high level, configuring a systems integration of Vault with Consul on Kubernetes consists of 1) a one time setup on Vault and 2) a setup of the secrets backend per Consul datacenter via Helm. +Integrating Vault with Consul on Kubernetes includes a one-time setup on Vault and setting up the secrets backend for each Consul datacenter via Helm. -### One time setup on Vault +Complete the following steps once: - Enabling Vault KV Secrets Engine - Version 2 to store arbitrary secrets - Enabling Vault PKI Engine if you are choosing to store and manage either [Consul Server TLS credentials](/docs/k8s/installation/vault/data-integration/server-tls) or [Service Mesh and Consul client TLS credentials](/docs/k8s/installation/vault/data-integration/connect-ca) -### Setup per Consul datacenter +Repeat the following steps for each datacenter in the cluster: - Installing the Vault Injector within the Consul datacenter installation - Configuring a Kubernetes Auth Method in Vault to authenticate and authorize operations from the Consul datacenter - Enable Vault as the Secrets Backend in the Consul datacenter -## One time setup on Vault - -A one time setup on a Vault deployment is necessary to enable both the Vault KV Secrets Engine and the Vault PKI Engine. These docs assume that you have already setup a Vault cluster for use with Consul on Kubernetes. - Please read [Run Vault on Kubernetes](https://www.vaultproject.io/docs/platform/k8s/helm/run) if instructions on setting up a Vault cluster are needed. -### Vault KV Secrets Engine - Version 2 +## Vault KV Secrets Engine - Version 2 The following secrets can be stored in Vault KV secrets engine, which is meant to handle arbitrary secrets: - ACL Bootstrap token ([`global.acls.bootstrapToken`](/docs/k8s/helm#v-global-acls-bootstraptoken)) @@ -41,7 +37,7 @@ In order to store any of these secrets, we must enable the [Vault KV secrets eng $ vault secrets enable -path=consul kv-v2 ``` -### Vault PKI Engine +## Vault PKI Engine The Vault PKI Engine must be enabled in order to leverage Vault for issuing Consul Server TLS certificates. More details for configuring the PKI Engine is found in [Bootstrapping the PKI Engine](/docs/k8s/installation/vault/data-integration/server-tls#bootstrapping-the-pki-engine) under the Server TLS section. @@ -49,61 +45,83 @@ The Vault PKI Engine must be enabled in order to leverage Vault for issuing Cons $ vault secrets enable pki ``` -## Setup per Consul datacenter - -After configuring Vault, Consul datacenters on Kubernetes must be deployed with the Vault Agent injector and configured to leverage the Vault Kubernetes Auth Method to read secrets from a Vault cluster. - -### Set Environment Variables to ensure integration consistency +## Set Environment Variables Before installing the Vault Injector and configuring the Vault Kubernetes Auth Method, some environment variables need to be set to better ensure consistent mapping between Vault and Consul on Kubernetes. -#### DATACENTER - - - **Recommended value:** value of `global.datacenter` in your Consul Helm values file. - ```shell-session - $ export DATACENTER=dc1 - ``` -#### VAULT_AUTH_METHOD_NAME - - - **Recommended value:** a concatenation of a `kubernetes-` prefix (to denote the auth method type) with `DATACENTER` environment variable. - ```shell-session - $ export VAULT_AUTH_METHOD_NAME=kubernetes-${DATACENTER} - ``` + - DATACENTER -#### VAULT_SERVER_HOST + We recommend using the value for `global.datacenter` in your Consul Helm values file for this variable. + ```shell-session + $ export DATACENTER=dc1 + ``` - - **Recommended value:** find the external IP address of your Vault cluster. - - If Vault is installed in a Kubernetes cluster, get the external IP or DNS name of the Vault server load balancer. - - On GKE or AKS, it'll be an IP: - ```shell-session - $ export VAULT_SERVER_HOST=$(kubectl get svc vault-dc1 -o jsonpath='{.status.loadBalancer.ingress[0].ip}') - ``` - - On EKS, it'll be a hostname: - ```shell-session - $ export VAULT_SERVER_HOST=$(kubectl get svc vault-dc1 -o jsonpath='{.status.loadBalancer.ingress[0].hostname}') - ``` - - If Vault is not running on Kubernetes, utilize the `api_addr` as defined in the Vault [High Availability Parameters](https://www.vaultproject.io/docs/configuration#high-availability-parameters) configuration: - ```shell-session - $ export VAULT_SERVER_HOST= - ``` + - VAULT_AUTH_METHOD_NAME + + We recommend using a concatenation of a `kubernetes-` prefix (to denote the auth method type) with the `DATACENTER` environment variable for this variable. + ```shell-session + $ export VAULT_AUTH_METHOD_NAME=kubernetes-${DATACENTER} + ``` -#### VAULT_ADDR + - VAULT_SERVER_HOST + + We recommend using the external IP address of your Vault cluster for this variable. + + If Vault is installed in a Kubernetes cluster, get the external IP or DNS name of the Vault server load balancer. + + + + On EKS, you can get the hostname of the Vault server's load balancer with the following command: - - **Recommended value:** Connecting to port 8200 of the Vault server + ```shell-session + $ export VAULT_SERVER_HOST=$(kubectl get svc vault-dc1 -o jsonpath='{.status.loadBalancer.ingress[0].hostname}') + ``` + + + + + + On GKE, you can get the IP address of the Vault server's load balancer with the following command: + + ```shell-session + $ export VAULT_SERVER_HOST=$(kubectl get svc vault-dc1 -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + ``` + + + + + + On AKS, you can get the IP address of the Vault server's load balancer with the following command: + + ```shell-session + $ export VAULT_SERVER_HOST=$(kubectl get svc vault-dc1 --output jsonpath='{.status.loadBalancer.ingress[0].ip}') + ``` + + + + + If Vault is not running on Kubernetes, utilize the `api_addr` as defined in the Vault [High Availability Parameters](https://www.vaultproject.io/docs/configuration#high-availability-parameters) configuration: + ```shell-session + $ export VAULT_SERVER_HOST= + ``` + + - VAULT_AUTH_METHOD_NAME + + We recommend connecting to port 8200 of the Vault server. ```shell-session $ export VAULT_ADDR=http://${VAULT_SERVER_HOST}:8200 ``` - -> **Note:** If your vault installation is current exposed using SSL, this address will need to use `https` instead of `http`. You will also need to setup the [`VAULT_CACERT`](https://www.vaultproject.io/docs/commands#vault_cacert) environment variable. + If your vault installation is current exposed using SSL, this address will need to use `https` instead of `http`. You will also need to setup the [`VAULT_CACERT`](https://www.vaultproject.io/docs/commands#vault_cacert) environment variable. -#### VAULT_TOKEN - - - **Recommended value:** Your allocated Vault token. If running Vault in dev mode, this can be set to to `root`. + - VAULT_TOKEN + + We recommend using your allocated Vault token as the value for this variable. If running Vault in dev mode, this can be set to to `root`. ```shell-session - $ export VAULT_ADDR= + $ export VAULT_TOKEN= ``` -### Install Vault Injector in your Consul k8s cluster +## Install Vault Injector in Consul k8s cluster A minimal valid installation of Vault Kubernetes must include the Agent Injector which is utilized for accessing secrets from Vault. Vault servers could be deployed external to Vault on Kubernetes with the [`injector.externalVaultAddr`](https://www.vaultproject.io/docs/platform/k8s/helm/configuration#externalvaultaddr) value in the Vault Helm Configuration. @@ -125,9 +143,7 @@ Issue the Helm `install` command to install the Vault agent injector using the H $ helm install vault-${DATACENTER} -f vault-injector.yaml hashicorp/vault --wait ``` -### Configure the Kubernetes Auth Method in Vault for the datacenter - -#### Enable the Auth Method +## Configure the Kubernetes Auth Method in Vault Ensure that the Vault Kubernetes Auth method is enabled. @@ -135,8 +151,6 @@ Ensure that the Vault Kubernetes Auth method is enabled. $ vault auth enable -path=kubernetes-${DATACENTER} kubernetes ``` -#### Configure Auth Method with JWT token of service account - After enabling the Kubernetes auth method, in Vault, ensure that you have configured the Kubernetes Auth method properly as described in [Kubernetes Auth Method Configuration](https://www.vaultproject.io/docs/auth/kubernetes#configuration). First, while targeting your Consul cluster, get the externally reachable address of the Consul Kubernetes cluster. @@ -157,7 +171,7 @@ $ vault write auth/kubernetes/config \ kubernetes_ca_cert=@/var/run/secrets/kubernetes.io/serviceaccount/ca.crt ``` -#### Enable Vault as the Secrets Backend in the Consul datacenter +## Update Vault Helm chart Finally, you will configure the Consul on Kubernetes helm chart for the datacenter to expect to receive the following values (if you have configured them) to be retrieved from Vault: - ACL Bootstrap token ([`global.acls.bootstrapToken`](/docs/k8s/helm#v-global-acls-bootstraptoken)) - ACL Partition token ([`global.acls.partitionToken`](/docs/k8s/helm#v-global-acls-partitiontoken)) diff --git a/website/content/docs/k8s/installation/vault/wan-federation.mdx b/website/content/docs/k8s/installation/vault/wan-federation.mdx index b454dbec7..37a13d64e 100644 --- a/website/content/docs/k8s/installation/vault/wan-federation.mdx +++ b/website/content/docs/k8s/installation/vault/wan-federation.mdx @@ -78,7 +78,7 @@ In this setup, you will deploy Vault server in the primary datacenter (dc1) Kube - On EKS, you can get the IP address of the Vault server's load balancer with the following command: + On EKS, you can get the hostname of the Vault server's load balancer with the following command: ```shell-session $ export VAULT_SERVER_HOST=$(kubectl get svc vault-dc1 -o jsonpath='{.status.loadBalancer.ingress[0].hostname}') @@ -114,17 +114,18 @@ In this setup, you will deploy Vault server in the primary datacenter (dc1) Kube ``` ## Systems Integration -### Overview -To use Vault as the Service Mesh Certificate Provider in Kubernetes, you must complete following systems integration actions: +There are two main procedures to enable Vault as the service mesh certificate provider in Kubernetes. -- One time setup in Vault +Complete the following steps once: 1. Enabling Vault KV Secrets Engine - Version 2. 1. Enabling Vault PKI Engine. -- Setup per Consul datacenter + +Repeat the following steps for each datacenter in the cluster: 1. Installing the Vault Injector within the Consul datacenter installation 1. Configuring a Kubernetes Auth Method in Vault to authenticate and authorize operations from the Consul datacenter 1. Enable Vault as the Secrets Backend in the Consul datacenter -### One time setup on Vault + +### Configure Vault Secrets engines 1. Enable [Vault KV secrets engine - Version 2](https://www.vaultproject.io/docs/secrets/kv/kv-v2) in order to store the [Gossip Encryption Key](/docs/k8s/helm#v-global-acls-replicationtoken) and the ACL Replication token ([`global.acls.replicationToken`](/docs/k8s/helm#v-global-acls-replicationtoken)). ```shell-session @@ -141,8 +142,7 @@ To use Vault as the Service Mesh Certificate Provider in Kubernetes, you must co $ vault secrets tune -max-lease-ttl=87600h pki ``` -### Setup per Consul datacenter -#### Primary Datacenter (dc1) +### Primary Datacenter (dc1) 1. Install the Vault Injector in your Consul Kubernetes cluster (dc1), which is used for accessing secrets from Vault. -> **Note**: In the primary datacenter (dc1), you will not have to configure `injector.externalvaultaddr` value because the Vault server is in the same primary datacenter (dc1) cluster. @@ -198,7 +198,7 @@ To use Vault as the Service Mesh Certificate Provider in Kubernetes, you must co -#### Secondary Datacenter (dc2) +### Secondary Datacenter (dc2) 1. Install the Vault Injector in the secondary datacenter (dc2). In the secondary datacenter (dc2), you will configure the `externalvaultaddr` value point to the external address of the Vault server in the primary datacenter (dc1). @@ -300,18 +300,17 @@ To use Vault as the Service Mesh Certificate Provider in Kubernetes, you must co ## Data Integration -### Overview -To use Vault as the Service Mesh Certificate Provider in Kubernetes, you must complete following data integration actions: +There are two main procedures for using Vault as the service mesh certificate provider in Kubernetes. - -- One time setup in Vault +Complete the following steps once: 1. Store the secrets in Vault. 1. Create a Vault policy that authorizes the desired level of access to the secrets. -- Setup per Consul datacenter + +Repeat the following steps for each datacenter in the cluster: 1. Create Vault Kubernetes auth roles that link the policy to each Consul on Kubernetes service account that requires access. 1. Update the Consul on Kubernetes helm chart. -### One time setup in Vault +### Secrets and Policies 1. Store the ACL Replication Token, Gossip Encryption Key, and Root CA certificate secrets in Vault. ```shell-session @@ -351,13 +350,12 @@ To use Vault as the Service Mesh Certificate Provider in Kubernetes, you must co EOF ``` -### Setup per Consul datacenter -#### Pre-installation for Primary Datacenter (dc1) +### Pre-installation for Primary Datacenter (dc1) 1. Change your Kubernetes context to target the primary datacenter (dc1): ```shell-session $ kubectl config use-context ``` -#### Primary Datacenter (dc1) +### Primary Datacenter (dc1) 1. Create Server TLS and Service Mesh Cert Policies ```shell-session @@ -491,7 +489,7 @@ To use Vault as the Service Mesh Certificate Provider in Kubernetes, you must co $ helm install consul-dc1 --values consul-dc1.yaml hashicorp/consul ``` -#### Pre-installation for Secondary Datacenter (dc2) +### Pre-installation for Secondary Datacenter (dc2) 1. Update the Consul on Kubernetes helm chart. For secondary datacenter (dc2), you will need to get the address of the mesh gateway from the **primary datacenter (dc1)** cluster. Keep your Kubernetes context targeting dc1 and set the `MESH_GW_HOST` environment variable that you will use in the Consul Helm chart for secondary datacenter (dc2). @@ -532,7 +530,7 @@ To use Vault as the Service Mesh Certificate Provider in Kubernetes, you must co ```shell-session $ kubectl config use-context ``` -#### Secondary Datacenter (dc2) +### Secondary Datacenter (dc2) 1. Create Server TLS and Service Mesh Cert Policies diff --git a/website/content/docs/upgrading/index.mdx b/website/content/docs/upgrading/index.mdx index 08a79a8a0..e9a2a892c 100644 --- a/website/content/docs/upgrading/index.mdx +++ b/website/content/docs/upgrading/index.mdx @@ -34,15 +34,23 @@ Consul is A, and version B is released. there are no compatibility issues that will affect your workload. If there are plan accordingly before continuing. -2. On each server, install version B of Consul. +2. On each Consul server agent, install version B of Consul. -3. One server at a time, shut down version A via `consul leave` and restart with version B. Wait until - the server is healthy and has rejoined the cluster before moving on to the - next server. +3. One Consul server agent at a time, shut down version A via `consul leave` and restart with version B. Wait until + the server agent is healthy and has rejoined the cluster before moving on to the + next server agent. -4. Once all the servers are upgraded, begin a rollout of clients following +4. Once all the server agents are upgraded, begin a rollout of client agents following the same process. + -> **Upgrade Envoy proxies:** If a client agent has associated Envoy proxies (e.g., sidecars, gateways), + install a [compatible Envoy version](/docs/connect/proxies/envoy#supported-versions) + for Consul version B. + After stopping client agent version A, + stop its associated Envoy proxies. + After restarting the client agent with version B, + restart its associated Envoy proxies with the compatible Envoy version. + 5. Done! You are now running the latest Consul agent. You can verify this by running `consul members` to make sure all members have the latest build and highest protocol version. diff --git a/website/content/docs/upgrading/upgrade-specific.mdx b/website/content/docs/upgrading/upgrade-specific.mdx index a9a72c3f9..ec0cf54d5 100644 --- a/website/content/docs/upgrading/upgrade-specific.mdx +++ b/website/content/docs/upgrading/upgrade-specific.mdx @@ -16,6 +16,18 @@ upgrade flow. ## Consul 1.13.0 +### gRPC TLS + +In prior Consul versions if HTTPS was enabled for the client API and exposed +via `ports { https = NUMBER }` then the same TLS material was used to encrypt +the gRPC port used for xDS. Now this is decoupled and activating TLS on the +gRPC endpoint is controlled solely with the gRPC section of the new +[`tls` stanza](/docs/agent/config/config-files#tls-configuration-reference). + +If you have not yet switched to the new `tls` stanza and were NOT using HTTPS +for the API then updating to Consul 1.13 will activate TLS for gRPC since the +deprecated TLS settings are used as defaults. + ### 1.9 Telemetry Compatibility #### Removing configuration options diff --git a/website/content/partials/http_api_and_cli_characteristics_links.mdx b/website/content/partials/http_api_and_cli_characteristics_links.mdx new file mode 100644 index 000000000..0cbb23702 --- /dev/null +++ b/website/content/partials/http_api_and_cli_characteristics_links.mdx @@ -0,0 +1,6 @@ + + +[Required ACLs]: /docs/security/acl +[Blocking queries]: /api-docs/features/blocking +[Consistency modes]: /api-docs/features/consistency +[Agent caching]: /api-docs/features/caching diff --git a/website/data/docs-nav-data.json b/website/data/docs-nav-data.json index 786a09531..921e98647 100644 --- a/website/data/docs-nav-data.json +++ b/website/data/docs-nav-data.json @@ -19,51 +19,6 @@ "path": "intro/usecases/what-is-a-service-mesh" } ] - }, - { - "title": "Consul vs. Other Software", - "routes": [ - { - "title": "Overview", - "path": "intro/vs" - }, - { - "title": "Chef, Puppet, etc.", - "path": "intro/vs/chef-puppet" - }, - { - "title": "Nagios", - "path": "intro/vs/nagios" - }, - { - "title": "SkyDNS", - "path": "intro/vs/skydns" - }, - { - "title": "SmartStack", - "path": "intro/vs/smartstack" - }, - { - "title": "Serf", - "path": "intro/vs/serf" - }, - { - "title": "Eureka", - "path": "intro/vs/eureka" - }, - { - "title": "Istio", - "path": "intro/vs/istio" - }, - { - "title": "Envoy and Other Proxies", - "path": "intro/vs/proxies" - }, - { - "title": "Custom Solutions", - "path": "intro/vs/custom" - } - ] } ] }, diff --git a/website/redirects.js b/website/redirects.js index c25cf8ccc..dfd6026ac 100644 --- a/website/redirects.js +++ b/website/redirects.js @@ -291,52 +291,6 @@ module.exports = [ permanent: true, }, { source: '/intro', destination: '/docs/intro', permanent: true }, - { source: '/intro/vs', destination: '/docs/intro/vs', permanent: true }, - { - source: '/intro/vs/chef-puppet', - destination: '/docs/intro/vs/chef-puppet', - permanent: true, - }, - { - source: '/intro/vs/nagios', - destination: '/docs/intro/vs/nagios', - permanent: true, - }, - { - source: '/intro/vs/skydns', - destination: '/docs/intro/vs/skydns', - permanent: true, - }, - { - source: '/intro/vs/smartstack', - destination: '/docs/intro/vs/smartstack', - permanent: true, - }, - { - source: '/intro/vs/serf', - destination: '/docs/intro/vs/serf', - permanent: true, - }, - { - source: '/intro/vs/eureka', - destination: '/docs/intro/vs/eureka', - permanent: true, - }, - { - source: '/intro/vs/istio', - destination: '/docs/intro/vs/istio', - permanent: true, - }, - { - source: '/intro/vs/proxies', - destination: '/docs/intro/vs/proxies', - permanent: true, - }, - { - source: '/intro/vs/custom', - destination: '/docs/intro/vs/custom', - permanent: true, - }, { source: '/docs/k8s/ambassador', destination: