From 1cf28996e717feb0ca1fa61e583a2e8961ead0b8 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 3 Mar 2023 11:41:59 -0500 Subject: [PATCH] acl: prevent privilege escalation via workload identity ACL policies can be associated with a job so that the job's Workload Identity can have expanded access to other policy objects, including other variables. Policies set on the variables the job automatically has access to were ignored, but this includes policies with `deny` capabilities. Additionally, when resolving claims for a workload identity without any attached policies, the `ResolveClaims` method returned a `nil` ACL object, which is treated similarly to a management token. While this was safe in Nomad 1.4.x, when the workload identity token was exposed to the task via the `identity` block, this allows a user with `submit-job` capabilities to escalate their privileges. We originally implemented automatic workload access to Variables as a separate code path in the Variables RPC endpoint so that we don't have to generate on-the-fly policies that blow up the ACL policy cache. This is fairly brittle but also the behavior around wildcard paths in policies different from the rest of our ACL polices, which is hard to reason about. Add an `ACLClaim` parameter to the `AllowVariableOperation` method so that we can push all this logic into the `acl` package and the behavior can be consistent. This will allow a `deny` policy to override automatic access (and probably speed up checks of non-automatic variable access). --- .changelog/16349.txt | 3 + .changelog/16419.txt | 3 + acl/acl.go | 37 +++++++-- acl/acl_test.go | 34 ++++++++- nomad/acl.go | 6 +- nomad/acl_test.go | 18 +++-- nomad/variables_endpoint.go | 68 +++++++---------- nomad/variables_endpoint_test.go | 84 ++++++++++++++++----- website/content/docs/concepts/variables.mdx | 34 ++++++++- 9 files changed, 208 insertions(+), 79 deletions(-) create mode 100644 .changelog/16349.txt create mode 100644 .changelog/16419.txt diff --git a/.changelog/16349.txt b/.changelog/16349.txt new file mode 100644 index 000000000..340168bfc --- /dev/null +++ b/.changelog/16349.txt @@ -0,0 +1,3 @@ +```release-note:security +variables: Fixed a bug where a workload-associated policy with a deny capability was ignored for the workload's own variables [CVE-2023-1296](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-1296) +``` diff --git a/.changelog/16419.txt b/.changelog/16419.txt new file mode 100644 index 000000000..2453fc5bc --- /dev/null +++ b/.changelog/16419.txt @@ -0,0 +1,3 @@ +```release-note:security +variables: Fixed a bug where a workload identity without any workload-associated policies was treated as a management token [CVE-2023-1299](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-1299) +``` diff --git a/acl/acl.go b/acl/acl.go index 41a793fe1..7725278c1 100644 --- a/acl/acl.go +++ b/acl/acl.go @@ -358,13 +358,13 @@ func (a *ACL) AllowHostVolume(ns string) bool { return !capabilities.Check(PolicyDeny) } -func (a *ACL) AllowVariableOperation(ns, path, op string) bool { +func (a *ACL) AllowVariableOperation(ns, path, op string, claim *ACLClaim) bool { if a.management { return true } // Check for a matching capability set - capabilities, ok := a.matchingVariablesCapabilitySet(ns, path) + capabilities, ok := a.matchingVariablesCapabilitySet(ns, path, claim) if !ok { return false } @@ -372,6 +372,13 @@ func (a *ACL) AllowVariableOperation(ns, path, op string) bool { return capabilities.Check(op) } +type ACLClaim struct { + Namespace string + Job string + Group string + Task string +} + // AllowVariableSearch is a very loose check that the token has *any* access to // a variables path for the namespace, with an expectation that the actual // search result will be filtered by specific paths @@ -459,16 +466,30 @@ func (a *ACL) matchingHostVolumeCapabilitySet(name string) (capabilitySet, bool) return a.findClosestMatchingGlob(a.wildcardHostVolumes, name) } -// matchingVariablesCapabilitySet looks for a capabilitySet that matches the namespace and path, -// if no concrete definitions are found, then we return the closest matching -// glob. +var workloadVariablesCapabilitySet = capabilitySet{"read": struct{}{}, "list": struct{}{}} + +// matchingVariablesCapabilitySet looks for a capabilitySet in the following order: +// - matching the namespace and path from a policy +// - automatic access based on the claim +// - closest matching glob +// // The closest matching glob is the one that has the smallest character // difference between the namespace and the glob. -func (a *ACL) matchingVariablesCapabilitySet(ns, path string) (capabilitySet, bool) { +func (a *ACL) matchingVariablesCapabilitySet(ns, path string, claim *ACLClaim) (capabilitySet, bool) { // Check for a concrete matching capability set - raw, ok := a.variables.Get([]byte(ns + "\x00" + path)) + capSet, ok := a.variables.Get([]byte(ns + "\x00" + path)) if ok { - return raw, true + return capSet, true + } + if claim != nil && ns == claim.Namespace { + switch path { + case "nomad/jobs", + fmt.Sprintf("nomad/jobs/%s", claim.Job), + fmt.Sprintf("nomad/jobs/%s/%s", claim.Job, claim.Group), + fmt.Sprintf("nomad/jobs/%s/%s/%s", claim.Job, claim.Group, claim.Task): + return workloadVariablesCapabilitySet, true + default: + } } // We didn't find a concrete match, so lets try and evaluate globs. diff --git a/acl/acl_test.go b/acl/acl_test.go index 3b74431db..d60dd5b5c 100644 --- a/acl/acl_test.go +++ b/acl/acl_test.go @@ -457,6 +457,7 @@ func TestVariablesMatching(t *testing.T) { ns string path string op string + claim *ACLClaim allow bool }{ { @@ -611,6 +612,36 @@ func TestVariablesMatching(t *testing.T) { op: "list", allow: true, }, + { + name: "claim with more specific policy", + policy: `namespace "ns" { + variables { path "nomad/jobs/example" { capabilities = ["deny"] }}}`, + ns: "ns", + path: "nomad/jobs/example", + op: "read", + claim: &ACLClaim{Namespace: "ns", Job: "example", Group: "foo", Task: "bar"}, + allow: false, + }, + { + name: "claim with less specific policy", + policy: `namespace "ns" { + variables { path "nomad/jobs" { capabilities = ["deny"] }}}`, + ns: "ns", + path: "nomad/jobs/example", + op: "read", + claim: &ACLClaim{Namespace: "ns", Job: "example", Group: "foo", Task: "bar"}, + allow: true, + }, + { + name: "claim with less specific wildcard policy", + policy: `namespace "ns" { + variables { path "nomad/jobs/*" { capabilities = ["deny"] }}}`, + ns: "ns", + path: "nomad/jobs/example", + op: "read", + claim: &ACLClaim{Namespace: "ns", Job: "example", Group: "foo", Task: "bar"}, + allow: true, + }, } for _, tc := range tests { @@ -622,7 +653,8 @@ func TestVariablesMatching(t *testing.T) { acl, err := NewACL(false, []*Policy{policy}) require.NoError(t, err) - require.Equal(t, tc.allow, acl.AllowVariableOperation(tc.ns, tc.path, tc.op)) + allowed := acl.AllowVariableOperation(tc.ns, tc.path, tc.op, tc.claim) + require.Equal(t, tc.allow, allowed) }) } diff --git a/nomad/acl.go b/nomad/acl.go index 4327f7403..f2a7a5058 100644 --- a/nomad/acl.go +++ b/nomad/acl.go @@ -267,11 +267,9 @@ func (s *Server) ResolveClaims(claims *structs.IdentityClaims) (*acl.ACL, error) if err != nil { return nil, err } - if len(policies) == 0 { - return nil, nil - } - // Compile and cache the ACL object + // Compile and cache the ACL object. For many claims this will result in an + // ACL object with no policies, which can be efficiently cached. aclObj, err := structs.CompileACLObject(s.aclCache, policies) if err != nil { return nil, err diff --git a/nomad/acl_test.go b/nomad/acl_test.go index 6b4bfdfff..a3c4e3d64 100644 --- a/nomad/acl_test.go +++ b/nomad/acl_test.go @@ -748,11 +748,6 @@ func TestResolveClaims(t *testing.T) { JobID: claims.JobID, } - index++ - err := store.UpsertACLPolicies(structs.MsgTypeTestSetup, index, []*structs.ACLPolicy{ - policy0, policy1, policy2, policy3, policy4, policy5, policy6, policy7}) - must.NoError(t, err) - aclObj, err := srv.ResolveClaims(claims) must.Nil(t, aclObj) must.EqError(t, err, "allocation does not exist") @@ -762,11 +757,22 @@ func TestResolveClaims(t *testing.T) { err = store.UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc}) must.NoError(t, err) + // Resolve claims and check we that the ACL object without policies provides no access aclObj, err = srv.ResolveClaims(claims) must.NoError(t, err) must.NotNil(t, aclObj) + must.False(t, aclObj.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs)) - // Check that the ACL object looks reasonable + // Add the policies + index++ + err = store.UpsertACLPolicies(structs.MsgTypeTestSetup, index, []*structs.ACLPolicy{ + policy0, policy1, policy2, policy3, policy4, policy5, policy6, policy7}) + must.NoError(t, err) + + // Re-resolve and check that the resulting ACL looks reasonable + aclObj, err = srv.ResolveClaims(claims) + must.NoError(t, err) + must.NotNil(t, aclObj) must.False(t, aclObj.IsManagement()) must.True(t, aclObj.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs)) must.False(t, aclObj.AllowNamespaceOperation("other", acl.NamespaceCapabilityListJobs)) diff --git a/nomad/variables_endpoint.go b/nomad/variables_endpoint.go index be43d0677..193419408 100644 --- a/nomad/variables_endpoint.go +++ b/nomad/variables_endpoint.go @@ -121,7 +121,7 @@ func svePreApply(sv *Variables, args *structs.VariablesApplyRequest, vd *structs } else if aclObj != nil { hasPerm := func(perm string) bool { return aclObj.AllowVariableOperation(args.Var.Namespace, - args.Var.Path, perm) + args.Var.Path, perm, nil) } canRead = hasPerm(acl.VariablesCapabilityRead) @@ -526,63 +526,53 @@ func (sv *Variables) authorize(aclObj *acl.ACL, claims *structs.IdentityClaims, // Perform normal ACL validation. If the ACL object is nil, that means we're // working with an identity claim. if aclObj != nil { - if !aclObj.AllowVariableOperation(ns, pathOrPrefix, policy) { + allowed := aclObj.AllowVariableOperation(ns, pathOrPrefix, policy, nil) + if !allowed { return structs.ErrPermissionDenied } return nil } + // Check the workload-associated policies and automatic task access to + // variables. if claims != nil { - // The workload identity gets access to paths that match its - // identity, without having to go thru the ACL system - err := sv.authValidatePrefix(claims, ns, pathOrPrefix) - if err == nil { - return nil - } - - // If the workload identity doesn't match the implicit permissions - // given to paths, check for its attached ACL policies - aclObj, err = sv.srv.ResolveClaims(claims) + aclObj, err := sv.srv.ResolveClaims(claims) if err != nil { - return err // this only returns an error when the state store has gone wrong + return err // returns internal errors only } - if aclObj != nil && aclObj.AllowVariableOperation( - ns, pathOrPrefix, policy) { - return nil + if aclObj != nil { + group, err := sv.groupForAlloc(claims) + if err != nil { + // returns ErrPermissionDenied for claims from terminal + // allocations, otherwise only internal errors + return err + } + allowed := aclObj.AllowVariableOperation( + ns, pathOrPrefix, policy, &acl.ACLClaim{ + Namespace: claims.Namespace, + Job: claims.JobID, + Group: group, + Task: claims.TaskName, + }) + if allowed { + return nil + } } } return structs.ErrPermissionDenied } -// authValidatePrefix asserts that the requested path is valid for -// this allocation -func (sv *Variables) authValidatePrefix(claims *structs.IdentityClaims, ns, pathOrPrefix string) error { - +func (sv *Variables) groupForAlloc(claims *structs.IdentityClaims) (string, error) { store, err := sv.srv.fsm.State().Snapshot() if err != nil { - return err + return "", err } alloc, err := store.AllocByID(nil, claims.AllocationID) if err != nil { - return err + return "", err } if alloc == nil || alloc.Job == nil { - return fmt.Errorf("allocation does not exist") + return "", structs.ErrPermissionDenied } - if alloc.Job.Namespace != ns { - return fmt.Errorf("allocation is in another namespace") - } - - parts := strings.Split(pathOrPrefix, "/") - expect := []string{"nomad", "jobs", claims.JobID, alloc.TaskGroup, claims.TaskName} - if len(parts) > len(expect) { - return structs.ErrPermissionDenied - } - - for idx, part := range parts { - if part != expect[idx] { - return structs.ErrPermissionDenied - } - } - return nil + return alloc.TaskGroup, nil } diff --git a/nomad/variables_endpoint_test.go b/nomad/variables_endpoint_test.go index 1171797f1..15d45d822 100644 --- a/nomad/variables_endpoint_test.go +++ b/nomad/variables_endpoint_test.go @@ -86,11 +86,13 @@ func TestVariablesEndpoint_auth(t *testing.T) { must.NoError(t, err) policy := mock.ACLPolicy() - policy.Rules = `namespace "nondefault-namespace" { + policy.Rules = fmt.Sprintf(`namespace "nondefault-namespace" { variables { path "nomad/jobs/*" { capabilities = ["list"] } + path "nomad/jobs/%s/web" { capabilities = ["deny"] } + path "nomad/jobs/%s" { capabilities = ["write"] } path "other/path" { capabilities = ["read"] } - }}` + }}`, jobID, jobID) policy.JobACL = &structs.JobACL{ Namespace: ns, JobID: jobID, @@ -144,68 +146,85 @@ func TestVariablesEndpoint_auth(t *testing.T) { expectedErr error }{ { - name: "valid claim for path with task secret", + name: "WI with policy no override can read task secret", token: idToken, cap: acl.PolicyRead, path: fmt.Sprintf("nomad/jobs/%s/web/web", jobID), expectedErr: nil, }, { - name: "valid claim for path with group secret", + name: "WI with policy no override can list task secret", token: idToken, - cap: acl.PolicyRead, + cap: acl.PolicyList, + path: fmt.Sprintf("nomad/jobs/%s/web/web", jobID), + expectedErr: nil, + }, + { + name: "WI with policy override denies list group secret", + token: idToken, + cap: acl.PolicyList, path: fmt.Sprintf("nomad/jobs/%s/web", jobID), - expectedErr: nil, + expectedErr: structs.ErrPermissionDenied, }, { - name: "valid claim for path with job secret", + name: "WI with policy override can write job secret", token: idToken, - cap: acl.PolicyRead, + cap: acl.PolicyWrite, path: fmt.Sprintf("nomad/jobs/%s", jobID), expectedErr: nil, }, { - name: "valid claim for path with dispatch job secret", - token: idDispatchToken, - cap: acl.PolicyRead, - path: fmt.Sprintf("nomad/jobs/%s", jobID), - expectedErr: nil, - }, - { - name: "valid claim for path with namespace secret", + name: "WI with policy override for write-only job secret", token: idToken, cap: acl.PolicyRead, + path: fmt.Sprintf("nomad/jobs/%s", jobID), + expectedErr: structs.ErrPermissionDenied, + }, + { + name: "WI with policy no override can list namespace secret", + token: idToken, + cap: acl.PolicyList, path: "nomad/jobs", expectedErr: nil, }, + { - name: "valid claim for job-attached policy", + name: "WI with policy can read other path", token: idToken, cap: acl.PolicyRead, path: "other/path", expectedErr: nil, }, { - name: "valid claim for job-attached policy path denied", + name: "WI with policy cannot read other path not explicitly allowed", token: idToken, cap: acl.PolicyRead, path: "other/not-allowed", expectedErr: structs.ErrPermissionDenied, }, { - name: "valid claim for job-attached policy capability denied", + name: "WI with policy has no write cap for other path", token: idToken, cap: acl.PolicyWrite, path: "other/path", expectedErr: structs.ErrPermissionDenied, }, { - name: "valid claim for job-attached policy capability with cross-job access", + name: "WI with policy can read cross-job path", token: idToken, cap: acl.PolicyList, path: "nomad/jobs/some-other", expectedErr: nil, }, + + { + name: "WI for dispatch job can read parent secret", + token: idDispatchToken, + cap: acl.PolicyRead, + path: fmt.Sprintf("nomad/jobs/%s", jobID), + expectedErr: nil, + }, + { name: "valid claim with no permissions denied by path", token: noPermissionsToken, @@ -626,6 +645,31 @@ func TestVariablesEndpoint_ListFiltering(t *testing.T) { } must.Eq(t, expect, found) + // Associate a policy with the identity's job to deny partial access. + policy := &structs.ACLPolicy{ + Name: "policy-for-identity", + Rules: mock.NamespacePolicyWithVariables(ns, "read", []string{}, + map[string][]string{"nomad/jobs/job1/group": []string{"deny"}}), + JobACL: &structs.JobACL{ + Namespace: ns, + JobID: "job1", + }, + } + policy.SetHash() + must.NoError(t, store.UpsertACLPolicies(structs.MsgTypeTestSetup, 16, + []*structs.ACLPolicy{policy})) + + must.NoError(t, msgpackrpc.CallWithCodec(codec, "Variables.List", req, &resp)) + found = []string{} + for _, variable := range resp.Data { + found = append(found, variable.Path) + } + expect = []string{ + "nomad/jobs/job1", + "nomad/jobs/job1/group/web", + } + must.Eq(t, expect, found) + } func TestVariablesEndpoint_ComplexACLPolicies(t *testing.T) { diff --git a/website/content/docs/concepts/variables.mdx b/website/content/docs/concepts/variables.mdx index 5ab969f7c..2e256dc0c 100644 --- a/website/content/docs/concepts/variables.mdx +++ b/website/content/docs/concepts/variables.mdx @@ -130,7 +130,7 @@ namespace "default" { ``` You can provide access to additional variables by creating policies associated -with the task's [workload identity]. For example, to give the task above access +with the task's [workload identity][]. For example, to give the task above access to all variables in the "shared" namespace, you can create the following policy file: @@ -152,6 +152,37 @@ nomad acl policy apply \ redis-policy ./policy.hcl ``` +Priority of policies and automatic task access to variables is similar to the +[ACL policy namespace rules][]. The most specific rule for a path applies, so a +rule for an exact path in a workload-attached policy overrides the automatic +task access to variables, but a wildcard rule does not. + +As an example, consider the job `example` in the namespace `prod`, with a group +`web` and a task named `httpd` with the following policy applied: + +```hcl +namespace "*" { + variables { + path "nomad/jobs" { + capabilities = ["list"] + } + + path "nomad/jobs/*" { + capabilities = ["deny"] + } + } +} +``` + +The task will have read/list access to its own variables `nomad/jobs/example`, +`nomad/jobs/example/web`, and `nomad/jobs/example/web/httpd` in the namespace +`prod`, because those are more specific than the wildcard rule that denies +access. The task will have list access to `nomad/jobs` in any namespace, because +that path is more specific than the automatic task access to the `nomad/jobs` +variable. And the task will not have access to `nomad/jobs/example` (or below) +in namespaces other than `prod`, because the automatic access rule does not +apply. + See [Workload Associated ACL Policies] for more details. [HashiCorp Consul]: https://www.consul.io/ @@ -161,3 +192,4 @@ See [Workload Associated ACL Policies] for more details. [`template`]: /nomad/docs/job-specification/template [workload identity]: /nomad/docs/concepts/workload-identity [Workload Associated ACL Policies]: /nomad/docs/concepts/workload-identity#workload-associated-acl-policies +[ACL policy namespace rules]: /nomad/docs/other-specifications/acl-policy#namespace-rules