acl: prevent privilege escalation via workload identity
ACL policies can be associated with a job so that the job's Workload Identity can have expanded access to other policy objects, including other variables. Policies set on the variables the job automatically has access to were ignored, but this includes policies with `deny` capabilities. Additionally, when resolving claims for a workload identity without any attached policies, the `ResolveClaims` method returned a `nil` ACL object, which is treated similarly to a management token. While this was safe in Nomad 1.4.x, when the workload identity token was exposed to the task via the `identity` block, this allows a user with `submit-job` capabilities to escalate their privileges. We originally implemented automatic workload access to Variables as a separate code path in the Variables RPC endpoint so that we don't have to generate on-the-fly policies that blow up the ACL policy cache. This is fairly brittle but also the behavior around wildcard paths in policies different from the rest of our ACL polices, which is hard to reason about. Add an `ACLClaim` parameter to the `AllowVariableOperation` method so that we can push all this logic into the `acl` package and the behavior can be consistent. This will allow a `deny` policy to override automatic access (and probably speed up checks of non-automatic variable access).
This commit is contained in:
parent
832bca91a1
commit
1cf28996e7
|
@ -0,0 +1,3 @@
|
|||
```release-note:security
|
||||
variables: Fixed a bug where a workload-associated policy with a deny capability was ignored for the workload's own variables [CVE-2023-1296](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-1296)
|
||||
```
|
|
@ -0,0 +1,3 @@
|
|||
```release-note:security
|
||||
variables: Fixed a bug where a workload identity without any workload-associated policies was treated as a management token [CVE-2023-1299](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-1299)
|
||||
```
|
37
acl/acl.go
37
acl/acl.go
|
@ -358,13 +358,13 @@ func (a *ACL) AllowHostVolume(ns string) bool {
|
|||
return !capabilities.Check(PolicyDeny)
|
||||
}
|
||||
|
||||
func (a *ACL) AllowVariableOperation(ns, path, op string) bool {
|
||||
func (a *ACL) AllowVariableOperation(ns, path, op string, claim *ACLClaim) bool {
|
||||
if a.management {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check for a matching capability set
|
||||
capabilities, ok := a.matchingVariablesCapabilitySet(ns, path)
|
||||
capabilities, ok := a.matchingVariablesCapabilitySet(ns, path, claim)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
@ -372,6 +372,13 @@ func (a *ACL) AllowVariableOperation(ns, path, op string) bool {
|
|||
return capabilities.Check(op)
|
||||
}
|
||||
|
||||
type ACLClaim struct {
|
||||
Namespace string
|
||||
Job string
|
||||
Group string
|
||||
Task string
|
||||
}
|
||||
|
||||
// AllowVariableSearch is a very loose check that the token has *any* access to
|
||||
// a variables path for the namespace, with an expectation that the actual
|
||||
// search result will be filtered by specific paths
|
||||
|
@ -459,16 +466,30 @@ func (a *ACL) matchingHostVolumeCapabilitySet(name string) (capabilitySet, bool)
|
|||
return a.findClosestMatchingGlob(a.wildcardHostVolumes, name)
|
||||
}
|
||||
|
||||
// matchingVariablesCapabilitySet looks for a capabilitySet that matches the namespace and path,
|
||||
// if no concrete definitions are found, then we return the closest matching
|
||||
// glob.
|
||||
var workloadVariablesCapabilitySet = capabilitySet{"read": struct{}{}, "list": struct{}{}}
|
||||
|
||||
// matchingVariablesCapabilitySet looks for a capabilitySet in the following order:
|
||||
// - matching the namespace and path from a policy
|
||||
// - automatic access based on the claim
|
||||
// - closest matching glob
|
||||
//
|
||||
// The closest matching glob is the one that has the smallest character
|
||||
// difference between the namespace and the glob.
|
||||
func (a *ACL) matchingVariablesCapabilitySet(ns, path string) (capabilitySet, bool) {
|
||||
func (a *ACL) matchingVariablesCapabilitySet(ns, path string, claim *ACLClaim) (capabilitySet, bool) {
|
||||
// Check for a concrete matching capability set
|
||||
raw, ok := a.variables.Get([]byte(ns + "\x00" + path))
|
||||
capSet, ok := a.variables.Get([]byte(ns + "\x00" + path))
|
||||
if ok {
|
||||
return raw, true
|
||||
return capSet, true
|
||||
}
|
||||
if claim != nil && ns == claim.Namespace {
|
||||
switch path {
|
||||
case "nomad/jobs",
|
||||
fmt.Sprintf("nomad/jobs/%s", claim.Job),
|
||||
fmt.Sprintf("nomad/jobs/%s/%s", claim.Job, claim.Group),
|
||||
fmt.Sprintf("nomad/jobs/%s/%s/%s", claim.Job, claim.Group, claim.Task):
|
||||
return workloadVariablesCapabilitySet, true
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// We didn't find a concrete match, so lets try and evaluate globs.
|
||||
|
|
|
@ -457,6 +457,7 @@ func TestVariablesMatching(t *testing.T) {
|
|||
ns string
|
||||
path string
|
||||
op string
|
||||
claim *ACLClaim
|
||||
allow bool
|
||||
}{
|
||||
{
|
||||
|
@ -611,6 +612,36 @@ func TestVariablesMatching(t *testing.T) {
|
|||
op: "list",
|
||||
allow: true,
|
||||
},
|
||||
{
|
||||
name: "claim with more specific policy",
|
||||
policy: `namespace "ns" {
|
||||
variables { path "nomad/jobs/example" { capabilities = ["deny"] }}}`,
|
||||
ns: "ns",
|
||||
path: "nomad/jobs/example",
|
||||
op: "read",
|
||||
claim: &ACLClaim{Namespace: "ns", Job: "example", Group: "foo", Task: "bar"},
|
||||
allow: false,
|
||||
},
|
||||
{
|
||||
name: "claim with less specific policy",
|
||||
policy: `namespace "ns" {
|
||||
variables { path "nomad/jobs" { capabilities = ["deny"] }}}`,
|
||||
ns: "ns",
|
||||
path: "nomad/jobs/example",
|
||||
op: "read",
|
||||
claim: &ACLClaim{Namespace: "ns", Job: "example", Group: "foo", Task: "bar"},
|
||||
allow: true,
|
||||
},
|
||||
{
|
||||
name: "claim with less specific wildcard policy",
|
||||
policy: `namespace "ns" {
|
||||
variables { path "nomad/jobs/*" { capabilities = ["deny"] }}}`,
|
||||
ns: "ns",
|
||||
path: "nomad/jobs/example",
|
||||
op: "read",
|
||||
claim: &ACLClaim{Namespace: "ns", Job: "example", Group: "foo", Task: "bar"},
|
||||
allow: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
|
@ -622,7 +653,8 @@ func TestVariablesMatching(t *testing.T) {
|
|||
|
||||
acl, err := NewACL(false, []*Policy{policy})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.allow, acl.AllowVariableOperation(tc.ns, tc.path, tc.op))
|
||||
allowed := acl.AllowVariableOperation(tc.ns, tc.path, tc.op, tc.claim)
|
||||
require.Equal(t, tc.allow, allowed)
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -267,11 +267,9 @@ func (s *Server) ResolveClaims(claims *structs.IdentityClaims) (*acl.ACL, error)
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(policies) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Compile and cache the ACL object
|
||||
// Compile and cache the ACL object. For many claims this will result in an
|
||||
// ACL object with no policies, which can be efficiently cached.
|
||||
aclObj, err := structs.CompileACLObject(s.aclCache, policies)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -748,11 +748,6 @@ func TestResolveClaims(t *testing.T) {
|
|||
JobID: claims.JobID,
|
||||
}
|
||||
|
||||
index++
|
||||
err := store.UpsertACLPolicies(structs.MsgTypeTestSetup, index, []*structs.ACLPolicy{
|
||||
policy0, policy1, policy2, policy3, policy4, policy5, policy6, policy7})
|
||||
must.NoError(t, err)
|
||||
|
||||
aclObj, err := srv.ResolveClaims(claims)
|
||||
must.Nil(t, aclObj)
|
||||
must.EqError(t, err, "allocation does not exist")
|
||||
|
@ -762,11 +757,22 @@ func TestResolveClaims(t *testing.T) {
|
|||
err = store.UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc})
|
||||
must.NoError(t, err)
|
||||
|
||||
// Resolve claims and check we that the ACL object without policies provides no access
|
||||
aclObj, err = srv.ResolveClaims(claims)
|
||||
must.NoError(t, err)
|
||||
must.NotNil(t, aclObj)
|
||||
must.False(t, aclObj.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs))
|
||||
|
||||
// Check that the ACL object looks reasonable
|
||||
// Add the policies
|
||||
index++
|
||||
err = store.UpsertACLPolicies(structs.MsgTypeTestSetup, index, []*structs.ACLPolicy{
|
||||
policy0, policy1, policy2, policy3, policy4, policy5, policy6, policy7})
|
||||
must.NoError(t, err)
|
||||
|
||||
// Re-resolve and check that the resulting ACL looks reasonable
|
||||
aclObj, err = srv.ResolveClaims(claims)
|
||||
must.NoError(t, err)
|
||||
must.NotNil(t, aclObj)
|
||||
must.False(t, aclObj.IsManagement())
|
||||
must.True(t, aclObj.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs))
|
||||
must.False(t, aclObj.AllowNamespaceOperation("other", acl.NamespaceCapabilityListJobs))
|
||||
|
|
|
@ -121,7 +121,7 @@ func svePreApply(sv *Variables, args *structs.VariablesApplyRequest, vd *structs
|
|||
} else if aclObj != nil {
|
||||
hasPerm := func(perm string) bool {
|
||||
return aclObj.AllowVariableOperation(args.Var.Namespace,
|
||||
args.Var.Path, perm)
|
||||
args.Var.Path, perm, nil)
|
||||
}
|
||||
canRead = hasPerm(acl.VariablesCapabilityRead)
|
||||
|
||||
|
@ -526,63 +526,53 @@ func (sv *Variables) authorize(aclObj *acl.ACL, claims *structs.IdentityClaims,
|
|||
// Perform normal ACL validation. If the ACL object is nil, that means we're
|
||||
// working with an identity claim.
|
||||
if aclObj != nil {
|
||||
if !aclObj.AllowVariableOperation(ns, pathOrPrefix, policy) {
|
||||
allowed := aclObj.AllowVariableOperation(ns, pathOrPrefix, policy, nil)
|
||||
if !allowed {
|
||||
return structs.ErrPermissionDenied
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check the workload-associated policies and automatic task access to
|
||||
// variables.
|
||||
if claims != nil {
|
||||
// The workload identity gets access to paths that match its
|
||||
// identity, without having to go thru the ACL system
|
||||
err := sv.authValidatePrefix(claims, ns, pathOrPrefix)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If the workload identity doesn't match the implicit permissions
|
||||
// given to paths, check for its attached ACL policies
|
||||
aclObj, err = sv.srv.ResolveClaims(claims)
|
||||
aclObj, err := sv.srv.ResolveClaims(claims)
|
||||
if err != nil {
|
||||
return err // this only returns an error when the state store has gone wrong
|
||||
return err // returns internal errors only
|
||||
}
|
||||
if aclObj != nil && aclObj.AllowVariableOperation(
|
||||
ns, pathOrPrefix, policy) {
|
||||
if aclObj != nil {
|
||||
group, err := sv.groupForAlloc(claims)
|
||||
if err != nil {
|
||||
// returns ErrPermissionDenied for claims from terminal
|
||||
// allocations, otherwise only internal errors
|
||||
return err
|
||||
}
|
||||
allowed := aclObj.AllowVariableOperation(
|
||||
ns, pathOrPrefix, policy, &acl.ACLClaim{
|
||||
Namespace: claims.Namespace,
|
||||
Job: claims.JobID,
|
||||
Group: group,
|
||||
Task: claims.TaskName,
|
||||
})
|
||||
if allowed {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return structs.ErrPermissionDenied
|
||||
}
|
||||
|
||||
// authValidatePrefix asserts that the requested path is valid for
|
||||
// this allocation
|
||||
func (sv *Variables) authValidatePrefix(claims *structs.IdentityClaims, ns, pathOrPrefix string) error {
|
||||
|
||||
func (sv *Variables) groupForAlloc(claims *structs.IdentityClaims) (string, error) {
|
||||
store, err := sv.srv.fsm.State().Snapshot()
|
||||
if err != nil {
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
alloc, err := store.AllocByID(nil, claims.AllocationID)
|
||||
if err != nil {
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
if alloc == nil || alloc.Job == nil {
|
||||
return fmt.Errorf("allocation does not exist")
|
||||
return "", structs.ErrPermissionDenied
|
||||
}
|
||||
if alloc.Job.Namespace != ns {
|
||||
return fmt.Errorf("allocation is in another namespace")
|
||||
}
|
||||
|
||||
parts := strings.Split(pathOrPrefix, "/")
|
||||
expect := []string{"nomad", "jobs", claims.JobID, alloc.TaskGroup, claims.TaskName}
|
||||
if len(parts) > len(expect) {
|
||||
return structs.ErrPermissionDenied
|
||||
}
|
||||
|
||||
for idx, part := range parts {
|
||||
if part != expect[idx] {
|
||||
return structs.ErrPermissionDenied
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return alloc.TaskGroup, nil
|
||||
}
|
||||
|
|
|
@ -86,11 +86,13 @@ func TestVariablesEndpoint_auth(t *testing.T) {
|
|||
must.NoError(t, err)
|
||||
|
||||
policy := mock.ACLPolicy()
|
||||
policy.Rules = `namespace "nondefault-namespace" {
|
||||
policy.Rules = fmt.Sprintf(`namespace "nondefault-namespace" {
|
||||
variables {
|
||||
path "nomad/jobs/*" { capabilities = ["list"] }
|
||||
path "nomad/jobs/%s/web" { capabilities = ["deny"] }
|
||||
path "nomad/jobs/%s" { capabilities = ["write"] }
|
||||
path "other/path" { capabilities = ["read"] }
|
||||
}}`
|
||||
}}`, jobID, jobID)
|
||||
policy.JobACL = &structs.JobACL{
|
||||
Namespace: ns,
|
||||
JobID: jobID,
|
||||
|
@ -144,68 +146,85 @@ func TestVariablesEndpoint_auth(t *testing.T) {
|
|||
expectedErr error
|
||||
}{
|
||||
{
|
||||
name: "valid claim for path with task secret",
|
||||
name: "WI with policy no override can read task secret",
|
||||
token: idToken,
|
||||
cap: acl.PolicyRead,
|
||||
path: fmt.Sprintf("nomad/jobs/%s/web/web", jobID),
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
name: "valid claim for path with group secret",
|
||||
name: "WI with policy no override can list task secret",
|
||||
token: idToken,
|
||||
cap: acl.PolicyRead,
|
||||
cap: acl.PolicyList,
|
||||
path: fmt.Sprintf("nomad/jobs/%s/web/web", jobID),
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
name: "WI with policy override denies list group secret",
|
||||
token: idToken,
|
||||
cap: acl.PolicyList,
|
||||
path: fmt.Sprintf("nomad/jobs/%s/web", jobID),
|
||||
expectedErr: nil,
|
||||
expectedErr: structs.ErrPermissionDenied,
|
||||
},
|
||||
{
|
||||
name: "valid claim for path with job secret",
|
||||
name: "WI with policy override can write job secret",
|
||||
token: idToken,
|
||||
cap: acl.PolicyRead,
|
||||
cap: acl.PolicyWrite,
|
||||
path: fmt.Sprintf("nomad/jobs/%s", jobID),
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
name: "valid claim for path with dispatch job secret",
|
||||
token: idDispatchToken,
|
||||
cap: acl.PolicyRead,
|
||||
path: fmt.Sprintf("nomad/jobs/%s", jobID),
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
name: "valid claim for path with namespace secret",
|
||||
name: "WI with policy override for write-only job secret",
|
||||
token: idToken,
|
||||
cap: acl.PolicyRead,
|
||||
path: fmt.Sprintf("nomad/jobs/%s", jobID),
|
||||
expectedErr: structs.ErrPermissionDenied,
|
||||
},
|
||||
{
|
||||
name: "WI with policy no override can list namespace secret",
|
||||
token: idToken,
|
||||
cap: acl.PolicyList,
|
||||
path: "nomad/jobs",
|
||||
expectedErr: nil,
|
||||
},
|
||||
|
||||
{
|
||||
name: "valid claim for job-attached policy",
|
||||
name: "WI with policy can read other path",
|
||||
token: idToken,
|
||||
cap: acl.PolicyRead,
|
||||
path: "other/path",
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
name: "valid claim for job-attached policy path denied",
|
||||
name: "WI with policy cannot read other path not explicitly allowed",
|
||||
token: idToken,
|
||||
cap: acl.PolicyRead,
|
||||
path: "other/not-allowed",
|
||||
expectedErr: structs.ErrPermissionDenied,
|
||||
},
|
||||
{
|
||||
name: "valid claim for job-attached policy capability denied",
|
||||
name: "WI with policy has no write cap for other path",
|
||||
token: idToken,
|
||||
cap: acl.PolicyWrite,
|
||||
path: "other/path",
|
||||
expectedErr: structs.ErrPermissionDenied,
|
||||
},
|
||||
{
|
||||
name: "valid claim for job-attached policy capability with cross-job access",
|
||||
name: "WI with policy can read cross-job path",
|
||||
token: idToken,
|
||||
cap: acl.PolicyList,
|
||||
path: "nomad/jobs/some-other",
|
||||
expectedErr: nil,
|
||||
},
|
||||
|
||||
{
|
||||
name: "WI for dispatch job can read parent secret",
|
||||
token: idDispatchToken,
|
||||
cap: acl.PolicyRead,
|
||||
path: fmt.Sprintf("nomad/jobs/%s", jobID),
|
||||
expectedErr: nil,
|
||||
},
|
||||
|
||||
{
|
||||
name: "valid claim with no permissions denied by path",
|
||||
token: noPermissionsToken,
|
||||
|
@ -626,6 +645,31 @@ func TestVariablesEndpoint_ListFiltering(t *testing.T) {
|
|||
}
|
||||
must.Eq(t, expect, found)
|
||||
|
||||
// Associate a policy with the identity's job to deny partial access.
|
||||
policy := &structs.ACLPolicy{
|
||||
Name: "policy-for-identity",
|
||||
Rules: mock.NamespacePolicyWithVariables(ns, "read", []string{},
|
||||
map[string][]string{"nomad/jobs/job1/group": []string{"deny"}}),
|
||||
JobACL: &structs.JobACL{
|
||||
Namespace: ns,
|
||||
JobID: "job1",
|
||||
},
|
||||
}
|
||||
policy.SetHash()
|
||||
must.NoError(t, store.UpsertACLPolicies(structs.MsgTypeTestSetup, 16,
|
||||
[]*structs.ACLPolicy{policy}))
|
||||
|
||||
must.NoError(t, msgpackrpc.CallWithCodec(codec, "Variables.List", req, &resp))
|
||||
found = []string{}
|
||||
for _, variable := range resp.Data {
|
||||
found = append(found, variable.Path)
|
||||
}
|
||||
expect = []string{
|
||||
"nomad/jobs/job1",
|
||||
"nomad/jobs/job1/group/web",
|
||||
}
|
||||
must.Eq(t, expect, found)
|
||||
|
||||
}
|
||||
|
||||
func TestVariablesEndpoint_ComplexACLPolicies(t *testing.T) {
|
||||
|
|
|
@ -130,7 +130,7 @@ namespace "default" {
|
|||
```
|
||||
|
||||
You can provide access to additional variables by creating policies associated
|
||||
with the task's [workload identity]. For example, to give the task above access
|
||||
with the task's [workload identity][]. For example, to give the task above access
|
||||
to all variables in the "shared" namespace, you can create the following policy
|
||||
file:
|
||||
|
||||
|
@ -152,6 +152,37 @@ nomad acl policy apply \
|
|||
redis-policy ./policy.hcl
|
||||
```
|
||||
|
||||
Priority of policies and automatic task access to variables is similar to the
|
||||
[ACL policy namespace rules][]. The most specific rule for a path applies, so a
|
||||
rule for an exact path in a workload-attached policy overrides the automatic
|
||||
task access to variables, but a wildcard rule does not.
|
||||
|
||||
As an example, consider the job `example` in the namespace `prod`, with a group
|
||||
`web` and a task named `httpd` with the following policy applied:
|
||||
|
||||
```hcl
|
||||
namespace "*" {
|
||||
variables {
|
||||
path "nomad/jobs" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
|
||||
path "nomad/jobs/*" {
|
||||
capabilities = ["deny"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The task will have read/list access to its own variables `nomad/jobs/example`,
|
||||
`nomad/jobs/example/web`, and `nomad/jobs/example/web/httpd` in the namespace
|
||||
`prod`, because those are more specific than the wildcard rule that denies
|
||||
access. The task will have list access to `nomad/jobs` in any namespace, because
|
||||
that path is more specific than the automatic task access to the `nomad/jobs`
|
||||
variable. And the task will not have access to `nomad/jobs/example` (or below)
|
||||
in namespaces other than `prod`, because the automatic access rule does not
|
||||
apply.
|
||||
|
||||
See [Workload Associated ACL Policies] for more details.
|
||||
|
||||
[HashiCorp Consul]: https://www.consul.io/
|
||||
|
@ -161,3 +192,4 @@ See [Workload Associated ACL Policies] for more details.
|
|||
[`template`]: /nomad/docs/job-specification/template
|
||||
[workload identity]: /nomad/docs/concepts/workload-identity
|
||||
[Workload Associated ACL Policies]: /nomad/docs/concepts/workload-identity#workload-associated-acl-policies
|
||||
[ACL policy namespace rules]: /nomad/docs/other-specifications/acl-policy#namespace-rules
|
||||
|
|
Loading…
Reference in New Issue