workload identity (#13223)

In order to support implicit ACL policies for tasks to get their own
secrets, each task would need to have its own ACL token. This would
add extra raft overhead as well as new garbage collection jobs for
cleaning up task-specific ACL tokens. Instead, Nomad will create a
workload Identity Claim for each task.

An Identity Claim is a JSON Web Token (JWT) signed by the server’s
private key and attached to an Allocation at the time a plan is
applied. The encoded JWT can be submitted as the X-Nomad-Token header
to replace ACL token secret IDs for the RPCs that support identity
claims.

Whenever a key is is added to a server’s keyring, it will use the key
as the seed for a Ed25519 public-private private keypair. That keypair
will be used for signing the JWT and for verifying the JWT.

This implementation is a ruthlessly minimal approach to support the
secure variables feature. When a JWT is verified, the allocation ID
will be checked against the Nomad state store, and non-existent or
terminal allocation IDs will cause the validation to be rejected. This
is sufficient to support the secure variables feature at launch
without requiring implementation of a background process to renew
soon-to-expire tokens.
This commit is contained in:
Tim Gross 2022-06-10 09:41:54 -04:00
parent e79fea2b4e
commit bfcbc00f4e
21 changed files with 681 additions and 41 deletions

View File

@ -36,6 +36,15 @@ rules:
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
# Pattern used by endpoints that support both normal ACLs and
# workload identity
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := $T.handleMixedAuthEndpoint(...)
...
# Pattern used by some Node endpoints.
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {

View File

@ -56,6 +56,9 @@ type TaskPrestartRequest struct {
// Vault token may optionally be set if a Vault token is available
VaultToken string
// NomadToken token may optionally be set if a Nomad token is available
NomadToken string
// TaskDir contains the task's directory tree on the host
TaskDir *allocdir.TaskDir
@ -153,6 +156,8 @@ type TaskExitedHook interface {
type TaskUpdateRequest struct {
VaultToken string
NomadToken string
// Alloc is the current version of the allocation (may have been
// updated since the hook was created)
Alloc *structs.Allocation

View File

@ -0,0 +1,50 @@
package taskrunner
import (
"context"
"sync"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
)
// identityHook sets the task runner's Nomad workload identity token
// based on the signed identity stored on the Allocation
type identityHook struct {
tr *TaskRunner
logger log.Logger
taskName string
lock sync.Mutex
}
func newIdentityHook(tr *TaskRunner, logger log.Logger) *identityHook {
h := &identityHook{
tr: tr,
taskName: tr.taskName,
}
h.logger = logger.Named(h.Name())
return h
}
func (*identityHook) Name() string {
return "identity"
}
func (h *identityHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
h.lock.Lock()
defer h.lock.Unlock()
token := h.tr.alloc.SignedIdentities[h.taskName]
h.tr.setNomadToken(token)
return nil
}
func (h *identityHook) Update(_ context.Context, req *interfaces.TaskUpdateRequest, _ *interfaces.TaskUpdateResponse) error {
h.lock.Lock()
defer h.lock.Unlock()
token := h.tr.alloc.SignedIdentities[h.taskName]
h.tr.setNomadToken(token)
return nil
}

View File

@ -187,6 +187,11 @@ type TaskRunner struct {
vaultToken string
vaultTokenLock sync.Mutex
// nomadToken is the current Nomad workload identity token. It
// should be accessed with the getter.
nomadToken string
nomadTokenLock sync.Mutex
// baseLabels are used when emitting tagged metrics. All task runner metrics
// will have these tags, and optionally more.
baseLabels []metrics.Label

View File

@ -76,6 +76,18 @@ func (tr *TaskRunner) setVaultToken(token string) {
tr.envBuilder.SetVaultToken(token, ns, tr.task.Vault.Env)
}
func (tr *TaskRunner) getNomadToken() string {
tr.nomadTokenLock.Lock()
defer tr.nomadTokenLock.Unlock()
return tr.nomadToken
}
func (tr *TaskRunner) setNomadToken(token string) {
tr.nomadTokenLock.Lock()
defer tr.nomadTokenLock.Unlock()
tr.nomadToken = token
}
// getDriverHandle returns a driver handle.
func (tr *TaskRunner) getDriverHandle() *DriverHandle {
tr.handleLock.Lock()

View File

@ -61,6 +61,7 @@ func (tr *TaskRunner) initHooks() {
tr.runnerHooks = []interfaces.TaskHook{
newValidateHook(tr.clientConfig, hookLogger),
newTaskDirHook(tr, hookLogger),
newIdentityHook(tr, hookLogger),
newLogMonHook(tr, hookLogger),
newDispatchHook(alloc, hookLogger),
newVolumeHook(tr, hookLogger),
@ -243,6 +244,7 @@ func (tr *TaskRunner) prestart() error {
}
req.VaultToken = tr.getVaultToken()
req.NomadToken = tr.getNomadToken()
// Time the prestart hook
var start time.Time

View File

@ -105,6 +105,9 @@ type TaskTemplateManagerConfig struct {
// NomadNamespace is the Nomad namespace for the task
NomadNamespace string
// NomadToken is the Nomad token or identity claim for the task
NomadToken string
}
// Validate validates the configuration.
@ -813,9 +816,7 @@ func newRunnerConfig(config *TaskTemplateManagerConfig,
// Set up Nomad
conf.Nomad.Namespace = &config.NomadNamespace
conf.Nomad.Transport.CustomDialer = cc.TemplateDialer
// Use the Node's SecretID to authenticate Nomad template function calls.
conf.Nomad.Token = &cc.Node.SecretID
conf.Nomad.Token = &config.NomadToken
conf.Finalize()
return conf, nil

View File

@ -63,6 +63,9 @@ type templateHook struct {
// vaultNamespace is the current Vault namespace
vaultNamespace string
// nomadToken is the current Nomad token
nomadToken string
// taskDir is the task directory
taskDir string
}
@ -91,6 +94,7 @@ func (h *templateHook) Prestart(ctx context.Context, req *interfaces.TaskPrestar
// Store the current Vault token and the task directory
h.taskDir = req.TaskDir.Dir
h.vaultToken = req.VaultToken
h.nomadToken = req.NomadToken
// Set vault namespace if specified
if req.Task.Vault != nil {
@ -126,6 +130,7 @@ func (h *templateHook) newManager() (unblock chan struct{}, err error) {
EnvBuilder: h.config.envBuilder,
MaxTemplateEventRate: template.DefaultMaxTemplateEventRate,
NomadNamespace: h.config.nomadNamespace,
NomadToken: h.nomadToken,
})
if err != nil {
h.logger.Error("failed to create template manager", "error", err)
@ -158,11 +163,12 @@ func (h *templateHook) Update(ctx context.Context, req *interfaces.TaskUpdateReq
return nil
}
// Check if the Vault token has changed
if req.VaultToken == h.vaultToken {
// Check if either the Nomad or Vault tokens have changed
if req.VaultToken == h.vaultToken && req.NomadToken == h.nomadToken {
return nil
} else {
h.vaultToken = req.VaultToken
h.nomadToken = req.NomadToken
}
// Shutdown the old template

2
go.mod
View File

@ -193,7 +193,7 @@ require (
github.com/godbus/dbus/v5 v5.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/gojuno/minimock/v3 v3.0.6 // indirect
github.com/golang-jwt/jwt/v4 v4.0.0 // indirect
github.com/golang-jwt/jwt/v4 v4.4.1
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/google/btree v1.0.0 // indirect
github.com/google/go-querystring v0.0.0-20170111101155-53e6ce116135 // indirect

3
go.sum
View File

@ -527,8 +527,9 @@ github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69
github.com/gojuno/minimock/v3 v3.0.4/go.mod h1:HqeqnwV8mAABn3pO5hqF+RE7gjA0jsN8cbbSogoGrzI=
github.com/gojuno/minimock/v3 v3.0.6 h1:YqHcVR10x2ZvswPK8Ix5yk+hMpspdQ3ckSpkOzyF85I=
github.com/gojuno/minimock/v3 v3.0.6/go.mod h1:v61ZjAKHr+WnEkND63nQPCZ/DTfQgJdvbCi3IuoMblY=
github.com/golang-jwt/jwt/v4 v4.0.0 h1:RAqyYixv1p7uEnocuy8P1nru5wprCh/MH2BIlW5z5/o=
github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
github.com/golang-jwt/jwt/v4 v4.4.1 h1:pC5DB52sCeK48Wlb9oPcdhnjkz1TKt1D/P7WKJ0kUcQ=
github.com/golang-jwt/jwt/v4 v4.4.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=

View File

@ -35,6 +35,10 @@ func (s *Server) ResolveToken(secretID string) (*acl.ACL, error) {
return resolveTokenFromSnapshotCache(snap, s.aclCache, secretID)
}
func (s *Server) ResolveClaim(token string) (*structs.IdentityClaims, error) {
return s.encrypter.VerifyClaim(token)
}
// resolveTokenFromSnapshotCache is used to resolve an ACL object from a snapshot of state,
// using a cache to avoid parsing and ACL construction when possible. It is split from resolveToken
// to simplify testing.

View File

@ -5,6 +5,7 @@ import (
"context"
"crypto/aes"
"crypto/cipher"
"crypto/ed25519"
"encoding/base64"
"encoding/json"
"fmt"
@ -15,6 +16,7 @@ import (
"sync"
"time"
jwt "github.com/golang-jwt/jwt/v4"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-msgpack/codec"
"golang.org/x/time/rate"
@ -27,15 +29,22 @@ const nomadKeystoreExtension = ".nks.json"
// Encrypter is the keyring for secure variables.
type Encrypter struct {
lock sync.RWMutex
keys map[string]*structs.RootKey // map of key IDs to key material
ciphers map[string]cipher.AEAD // map of key IDs to ciphers
srv *Server
keystorePath string
keyring map[string]*keyset
lock sync.RWMutex
}
type keyset struct {
rootKey *structs.RootKey
cipher cipher.AEAD
privateKey ed25519.PrivateKey
}
// NewEncrypter loads or creates a new local keystore and returns an
// encryption keyring with the keys it finds.
func NewEncrypter(keystorePath string) (*Encrypter, error) {
func NewEncrypter(srv *Server, keystorePath string) (*Encrypter, error) {
err := os.MkdirAll(keystorePath, 0700)
if err != nil {
return nil, err
@ -44,14 +53,14 @@ func NewEncrypter(keystorePath string) (*Encrypter, error) {
if err != nil {
return nil, err
}
encrypter.srv = srv
return encrypter, nil
}
func encrypterFromKeystore(keystoreDirectory string) (*Encrypter, error) {
encrypter := &Encrypter{
ciphers: make(map[string]cipher.AEAD),
keys: make(map[string]*structs.RootKey),
keyring: make(map[string]*keyset),
keystorePath: keystoreDirectory,
}
@ -107,6 +116,62 @@ func (e *Encrypter) Encrypt(unencryptedData []byte, keyID string) []byte {
return unencryptedData
}
// keyIDHeader is the JWT header for the Nomad Key ID used to sign the
// claim. This name matches the common industry practice for this
// header name.
const keyIDHeader = "kid"
// SignClaims signs the identity claim for the task and returns an
// encoded JWT with both the claim and its signature
func (e *Encrypter) SignClaims(claim *structs.IdentityClaims) (string, error) {
keyset, err := e.activeKeySet()
if err != nil {
return "", err
}
token := jwt.NewWithClaims(&jwt.SigningMethodEd25519{}, claim)
token.Header[keyIDHeader] = keyset.rootKey.Meta.KeyID
tokenString, err := token.SignedString(keyset.privateKey)
if err != nil {
return "", err
}
return tokenString, nil
}
// VerifyClaim accepts a previously-signed encoded claim and validates
// it before returning the claim
func (e *Encrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, error) {
token, err := jwt.ParseWithClaims(tokenString, &structs.IdentityClaims{}, func(token *jwt.Token) (interface{}, error) {
if _, ok := token.Method.(*jwt.SigningMethodEd25519); !ok {
return nil, fmt.Errorf("unexpected signing method: %v", token.Method.Alg())
}
raw := token.Header[keyIDHeader]
if raw == nil {
return nil, fmt.Errorf("missing key ID header")
}
keyID := raw.(string)
keyset, err := e.keysetByID(keyID)
if err != nil {
return nil, err
}
return keyset.privateKey.Public(), nil
})
if err != nil {
return nil, fmt.Errorf("failed to verify token: %v", err)
}
claims, ok := token.Claims.(*structs.IdentityClaims)
if !ok || !token.Valid {
return nil, fmt.Errorf("failed to verify token: invalid token")
}
return claims, nil
}
// Decrypt takes an encrypted buffer and then root key ID. It extracts
// the nonce, decrypts the content, and returns the cleartext data.
func (e *Encrypter) Decrypt(encryptedData []byte, keyID string) ([]byte, error) {
@ -150,22 +215,46 @@ func (e *Encrypter) addCipher(rootKey *structs.RootKey) error {
return fmt.Errorf("invalid algorithm %s", rootKey.Meta.Algorithm)
}
privateKey := ed25519.NewKeyFromSeed(rootKey.Key)
e.lock.Lock()
defer e.lock.Unlock()
e.ciphers[rootKey.Meta.KeyID] = aead
e.keys[rootKey.Meta.KeyID] = rootKey
e.keyring[rootKey.Meta.KeyID] = &keyset{
rootKey: rootKey,
cipher: aead,
privateKey: privateKey,
}
return nil
}
// GetKey retrieves the key material by ID from the keyring
func (e *Encrypter) GetKey(keyID string) ([]byte, error) {
keyset, err := e.keysetByID(keyID)
if err != nil {
return nil, err
}
return keyset.rootKey.Key, nil
}
func (e *Encrypter) activeKeySet() (*keyset, error) {
store := e.srv.fsm.State()
keyMeta, err := store.GetActiveRootKeyMeta(nil)
if err != nil {
return nil, err
}
return e.keysetByID(keyMeta.KeyID)
}
func (e *Encrypter) keysetByID(keyID string) (*keyset, error) {
e.lock.RLock()
defer e.lock.RUnlock()
key, ok := e.keys[keyID]
keyset, ok := e.keyring[keyID]
if !ok {
return []byte{}, fmt.Errorf("no such key %s in keyring", keyID)
return nil, fmt.Errorf("no such key %s in keyring", keyID)
}
return key.Key, nil
return keyset, nil
}
// RemoveKey removes a key by ID from the keyring
@ -175,8 +264,7 @@ func (e *Encrypter) RemoveKey(keyID string) error {
// remove the serialized file?
e.lock.Lock()
defer e.lock.Unlock()
delete(e.ciphers, keyID)
delete(e.keys, keyID)
delete(e.keyring, keyID)
return nil
}
@ -212,6 +300,7 @@ func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) {
if err := json.Unmarshal(raw, storedKey); err != nil {
return nil, err
}
meta := &structs.RootKeyMeta{
Active: storedKey.Meta.Active,
KeyID: storedKey.Meta.KeyID,
@ -231,7 +320,6 @@ func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) {
Meta: meta,
Key: key,
}, nil
}
type KeyringReplicator struct {

View File

@ -11,6 +11,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
)
@ -20,7 +21,7 @@ func TestEncrypter_LoadSave(t *testing.T) {
ci.Parallel(t)
tmpDir := t.TempDir()
encrypter, err := NewEncrypter(tmpDir)
encrypter, err := NewEncrypter(nil, tmpDir)
require.NoError(t, err)
algos := []structs.EncryptionAlgorithm{
@ -270,5 +271,29 @@ func TestKeyringReplicator(t *testing.T) {
require.Eventually(t, checkReplicationFn(keyID3),
time.Second*5, time.Second,
"expected keys to be replicated to followers after election")
}
func TestEncrypter_SignVerify(t *testing.T) {
ci.Parallel(t)
srv, shutdown := TestServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer shutdown()
testutil.WaitForLeader(t, srv.RPC)
alloc := mock.Alloc()
claim := alloc.ToTaskIdentityClaims("web")
e := srv.encrypter
out, err := e.SignClaims(claim)
require.NoError(t, err)
got, err := e.VerifyClaim(out)
require.NoError(t, err)
require.NotNil(t, got)
require.NoError(t, got.Valid())
require.Equal(t, alloc.ID, got.AllocationID)
require.Equal(t, alloc.JobID, got.JobID)
require.Equal(t, "web", got.TaskName)
}

View File

@ -242,6 +242,11 @@ func (p *planner) applyPlan(plan *structs.Plan, result *structs.PlanResult, snap
// to approximate the scheduling time.
updateAllocTimestamps(req.AllocsUpdated, now)
err := p.signAllocIdentities(plan.Job, req.AllocsUpdated)
if err != nil {
return nil, err
}
for _, preemptions := range result.NodePreemptions {
for _, preemptedAlloc := range preemptions {
req.AllocsPreempted = append(req.AllocsPreempted, normalizePreemptedAlloc(preemptedAlloc, now))
@ -367,6 +372,25 @@ func updateAllocTimestamps(allocations []*structs.Allocation, timestamp int64) {
}
}
func (p *planner) signAllocIdentities(job *structs.Job, allocations []*structs.Allocation) error {
encrypter := p.Server.encrypter
for _, alloc := range allocations {
alloc.SignedIdentities = map[string]string{}
tg := job.LookupTaskGroup(alloc.TaskGroup)
for _, task := range tg.Tasks {
claims := alloc.ToTaskIdentityClaims(task.Name)
token, err := encrypter.SignClaims(claims)
if err != nil {
return err
}
alloc.SignedIdentities[task.Name] = token
}
}
return nil
}
// asyncPlanWait is used to apply and respond to a plan async. On successful
// commit the plan's index will be sent on the chan. On error the chan will be
// closed.

View File

@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
metrics "github.com/armon/go-metrics"
@ -11,6 +12,7 @@ import (
memdb "github.com/hashicorp/go-memdb"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/state/paginator"
"github.com/hashicorp/nomad/nomad/structs"
@ -128,13 +130,11 @@ func (sv *SecureVariables) Read(args *structs.SecureVariablesReadRequest, reply
}
defer metrics.MeasureSince([]string{"nomad", "secure_variables", "read"}, time.Now())
if aclObj, err := sv.srv.ResolveToken(args.AuthToken); err != nil {
// FIXME: Temporary ACL Test policy. Update once implementation complete
err := sv.handleMixedAuthEndpoint(args.QueryOptions,
acl.NamespaceCapabilitySubmitJob, args.Path)
if err != nil {
return err
} else if aclObj != nil {
// FIXME: Temporary ACL Test policy. Update once implementation complete
if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
return structs.ErrPermissionDenied
}
}
// Setup the blocking query
@ -180,13 +180,14 @@ func (sv *SecureVariables) List(
return sv.listAllSecureVariables(args, reply)
}
if aclObj, err := sv.srv.ResolveToken(args.AuthToken); err != nil {
// FIXME: Temporary ACL Test policy. Update once implementation complete
err := sv.handleMixedAuthEndpoint(args.QueryOptions,
acl.NamespaceCapabilitySubmitJob, args.Prefix)
if err != nil {
return err
}
if err != nil {
return err
} else if aclObj != nil {
// FIXME: Temporary ACL Test policy. Update once implementation complete
if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
return structs.ErrPermissionDenied
}
}
// Set up and return the blocking query.
@ -367,3 +368,95 @@ func (sv *SecureVariables) decrypt(v *structs.SecureVariable) error {
v.EncryptedData = nil
return nil
}
// handleMixedAuthEndpoint is a helper to handle auth on RPC endpoints that can
// either be called by external clients or by workload identity
func (sv *SecureVariables) handleMixedAuthEndpoint(args structs.QueryOptions, cap, pathOrPrefix string) error {
// Perform the initial token resolution.
aclObj, err := sv.srv.ResolveToken(args.AuthToken)
if err == nil {
// Perform our ACL validation. If the object is nil, this means ACLs
// are not enabled, otherwise trigger the allowed namespace function.
if aclObj != nil {
if !aclObj.AllowNsOp(args.RequestNamespace(), cap) {
return structs.ErrPermissionDenied
}
}
return nil
}
if helper.IsUUID(args.AuthToken) {
// early return for ErrNotFound or other errors if it's formed
// like an ACLToken.SecretID
return err
}
// Attempt to verify the token as a JWT with a workload
// identity claim
claim, err := sv.srv.ResolveClaim(args.AuthToken)
if err != nil {
return structs.ErrPermissionDenied
}
alloc, err := sv.authValidateAllocationIdentity(claim.AllocationID, args.RequestNamespace())
if err != nil {
metrics.IncrCounter([]string{
"nomad", "secure_variables", "invalid_allocation_identity"}, 1)
sv.logger.Trace("allocation identity was not valid", "error", err)
return structs.ErrPermissionDenied
}
err = sv.authValidatePrefix(alloc, claim.TaskName, pathOrPrefix)
if err != nil {
sv.logger.Trace("allocation identity did not have permission for path", "error", err)
return structs.ErrPermissionDenied
}
return nil
}
// authValidateAllocationIdentity asserts that the allocation ID
// belongs to a non-terminal Allocation in the requested namespace
func (sv *SecureVariables) authValidateAllocationIdentity(allocID, ns string) (*structs.Allocation, error) {
store, err := sv.srv.fsm.State().Snapshot()
if err != nil {
return nil, err
}
alloc, err := store.AllocByID(nil, allocID)
if err != nil {
return nil, err
}
if alloc == nil || alloc.Job == nil {
return nil, fmt.Errorf("allocation does not exist")
}
// the claims for terminal allocs are always treated as expired
if alloc.TerminalStatus() {
return nil, fmt.Errorf("allocation is terminal")
}
if alloc.Job.Namespace != ns {
return nil, fmt.Errorf("allocation is in another namespace")
}
return alloc, nil
}
// authValidatePrefix asserts that the requested path is valid for
// this allocation
func (sv *SecureVariables) authValidatePrefix(alloc *structs.Allocation, taskName, pathOrPrefix string) error {
parts := strings.Split(pathOrPrefix, "/")
expect := []string{"jobs", alloc.Job.ID, alloc.TaskGroup, taskName}
if len(parts) > len(expect) {
return structs.ErrPermissionDenied
}
for idx, part := range parts {
if part != expect[idx] {
return structs.ErrPermissionDenied
}
}
return nil
}

View File

@ -0,0 +1,148 @@
package nomad
import (
"fmt"
"math/rand"
"strings"
"testing"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/require"
)
func TestSecureVariablesEndpoint_auth(t *testing.T) {
ci.Parallel(t)
srv, _, shutdown := TestACLServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer shutdown()
testutil.WaitForLeader(t, srv.RPC)
const ns = "nondefault-namespace"
alloc := mock.Alloc()
alloc.ClientStatus = structs.AllocClientStatusFailed
alloc.Job.Namespace = ns
jobID := alloc.JobID
store := srv.fsm.State()
require.NoError(t, store.UpsertAllocs(
structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc}))
claim := alloc.ToTaskIdentityClaims("web")
e := srv.encrypter
idToken, err := e.SignClaims(claim)
require.NoError(t, err)
// corrupt the signature of the token
idTokenParts := strings.Split(idToken, ".")
require.Len(t, idTokenParts, 3)
sig := []string(strings.Split(idTokenParts[2], ""))
rand.Shuffle(len(sig), func(i, j int) {
sig[i], sig[j] = sig[j], sig[i]
})
idTokenParts[2] = strings.Join(sig, "")
invalidIDToken := strings.Join(idTokenParts, ".")
t.Run("terminal alloc should be denied", func(t *testing.T) {
err = srv.staticEndpoints.SecureVariables.handleMixedAuthEndpoint(
structs.QueryOptions{AuthToken: idToken, Namespace: ns}, "n/a",
fmt.Sprintf("jobs/%s/web/web", jobID))
require.EqualError(t, err, structs.ErrPermissionDenied.Error())
})
// make alloc non-terminal
alloc.ClientStatus = structs.AllocClientStatusRunning
require.NoError(t, store.UpsertAllocs(
structs.MsgTypeTestSetup, 1200, []*structs.Allocation{alloc}))
t.Run("wrong namespace should be denied", func(t *testing.T) {
err = srv.staticEndpoints.SecureVariables.handleMixedAuthEndpoint(
structs.QueryOptions{AuthToken: idToken, Namespace: structs.DefaultNamespace}, "n/a",
fmt.Sprintf("jobs/%s/web/web", jobID))
require.EqualError(t, err, structs.ErrPermissionDenied.Error())
})
testCases := []struct {
name string
token string
cap string
path string
expectedErr error
}{
{
name: "valid claim for path with task secret",
token: idToken,
cap: "n/a",
path: fmt.Sprintf("jobs/%s/web/web", jobID),
expectedErr: nil,
},
{
name: "valid claim for path with group secret",
token: idToken,
cap: "n/a",
path: fmt.Sprintf("jobs/%s/web", jobID),
expectedErr: nil,
},
{
name: "valid claim for path with job secret",
token: idToken,
cap: "n/a",
path: fmt.Sprintf("jobs/%s", jobID),
expectedErr: nil,
},
{
name: "valid claim for path with namespace secret",
token: idToken,
cap: "n/a",
path: "jobs",
expectedErr: nil,
},
{
name: "extra trailing slash is denied",
token: idToken,
cap: "n/a",
path: fmt.Sprintf("jobs/%s/web/", jobID),
expectedErr: structs.ErrPermissionDenied,
},
{
name: "invalid prefix is denied",
token: idToken,
cap: "n/a",
path: fmt.Sprintf("jobs/%s/w", jobID),
expectedErr: structs.ErrPermissionDenied,
},
{
name: "missing auth token is denied",
cap: "n/a",
path: fmt.Sprintf("jobs/%s/web/web", jobID),
expectedErr: structs.ErrPermissionDenied,
},
{
name: "invalid signature is denied",
token: invalidIDToken,
cap: "n/a",
path: fmt.Sprintf("jobs/%s/web/web", jobID),
expectedErr: structs.ErrPermissionDenied,
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
err := srv.staticEndpoints.SecureVariables.handleMixedAuthEndpoint(
structs.QueryOptions{AuthToken: tc.token, Namespace: ns}, tc.cap, tc.path)
if tc.expectedErr == nil {
require.NoError(t, err)
} else {
require.EqualError(t, err, tc.expectedErr.Error())
}
})
}
}

View File

@ -394,7 +394,7 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigEntr
}
// Set up the keyring
encrypter, err := NewEncrypter(filepath.Join(s.config.DataDir, "keystore"))
encrypter, err := NewEncrypter(s, filepath.Join(s.config.DataDir, "keystore"))
if err != nil {
return nil, err
}

View File

@ -11,6 +11,7 @@ import (
"github.com/hashicorp/go-memdb"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/state/paginator"
"github.com/hashicorp/nomad/nomad/structs"
@ -528,27 +529,46 @@ func (s *ServiceRegistration) handleMixedAuthEndpoint(args structs.QueryOptions,
}
}
default:
// Attempt to verify the token as a JWT with a workload
// identity claim if it's not a secret ID.
// COMPAT(1.4.0): we can remove this conditional in 1.5.0
if !helper.IsUUID(args.AuthToken) {
claims, err := s.srv.ResolveClaim(args.AuthToken)
if err != nil {
return err
}
if claims == nil {
return structs.ErrPermissionDenied
}
return nil
}
// COMPAT(1.4.0): Nomad 1.3.0 shipped with authentication by
// node secret but that's been replaced with workload identity
// in 1.4.0. Leave this here for backwards compatibility
// between clients and servers during cluster upgrades, but
// remove for 1.5.0
// In the event we got any error other than ErrTokenNotFound, consider this
// terminal.
if err != structs.ErrTokenNotFound {
return err
}
// Attempt to lookup AuthToken as a Node.SecretID and return any error
// wrapped along with the original.
// Attempt to lookup AuthToken as a Node.SecretID and
// return any error wrapped along with the original.
node, stateErr := s.srv.fsm.State().NodeBySecretID(nil, args.AuthToken)
if stateErr != nil {
var mErr multierror.Error
mErr.Errors = append(mErr.Errors, err, stateErr)
return mErr.ErrorOrNil()
}
// At this point, we do not have a valid ACL token, nor are we being
// called, or able to confirm via the state store, by a node.
if node == nil {
return structs.ErrTokenNotFound
}
}
}
return nil
}

View File

@ -858,7 +858,66 @@ func TestServiceRegistration_List(t *testing.T) {
}},
}, serviceRegResp.Services)
},
name: "ACLs enabled with node secret toekn",
name: "ACLs enabled with node secret token",
},
{
serverFn: func(t *testing.T) (*Server, *structs.ACLToken, func()) {
return TestACLServer(t, nil)
},
testFn: func(t *testing.T, s *Server, token *structs.ACLToken) {
codec := rpcClient(t, s)
testutil.WaitForLeader(t, s.RPC)
// Create a namespace as this is needed when using an ACL like
// we do in this test.
ns := &structs.Namespace{
Name: "platform",
Description: "test namespace",
CreateIndex: 5,
ModifyIndex: 5,
}
ns.SetHash()
require.NoError(t, s.State().UpsertNamespaces(5, []*structs.Namespace{ns}))
// Generate an allocation with a signed identity
allocs := []*structs.Allocation{mock.Alloc()}
job := allocs[0].Job
require.NoError(t, s.State().UpsertJob(structs.MsgTypeTestSetup, 10, job))
s.signAllocIdentities(job, allocs)
require.NoError(t, s.State().UpsertAllocs(structs.MsgTypeTestSetup, 15, allocs))
signedToken := allocs[0].SignedIdentities["web"]
// Generate and upsert some service registrations.
services := mock.ServiceRegistrations()
require.NoError(t, s.State().UpsertServiceRegistrations(
structs.MsgTypeTestSetup, 20, services))
// Test a request while setting the auth token to the signed token
serviceRegReq := &structs.ServiceRegistrationListRequest{
QueryOptions: structs.QueryOptions{
Namespace: "platform",
Region: DefaultRegion,
AuthToken: signedToken,
},
}
var serviceRegResp structs.ServiceRegistrationListResponse
err := msgpackrpc.CallWithCodec(
codec, structs.ServiceRegistrationListRPCMethod,
serviceRegReq, &serviceRegResp)
require.NoError(t, err)
require.ElementsMatch(t, []*structs.ServiceRegistrationListStub{
{
Namespace: "platform",
Services: []*structs.ServiceRegistrationStub{
{
ServiceName: "countdash-api",
Tags: []string{"bar"},
},
}},
}, serviceRegResp.Services)
},
name: "ACLs enabled with valid signed identity",
},
}
@ -1023,6 +1082,7 @@ func TestServiceRegistration_GetService(t *testing.T) {
},
name: "ACLs enabled",
},
{
serverFn: func(t *testing.T) (*Server, *structs.ACLToken, func()) {
return TestACLServer(t, nil)
@ -1075,6 +1135,50 @@ func TestServiceRegistration_GetService(t *testing.T) {
},
name: "ACLs enabled using node secret",
},
{
serverFn: func(t *testing.T) (*Server, *structs.ACLToken, func()) {
return TestACLServer(t, nil)
},
testFn: func(t *testing.T, s *Server, token *structs.ACLToken) {
codec := rpcClient(t, s)
testutil.WaitForLeader(t, s.RPC)
// Generate mock services then upsert them individually using different indexes.
services := mock.ServiceRegistrations()
require.NoError(t, s.fsm.State().UpsertServiceRegistrations(
structs.MsgTypeTestSetup, 10, []*structs.ServiceRegistration{services[0]}))
require.NoError(t, s.fsm.State().UpsertServiceRegistrations(
structs.MsgTypeTestSetup, 20, []*structs.ServiceRegistration{services[1]}))
// Generate an allocation with a signed identity
allocs := []*structs.Allocation{mock.Alloc()}
job := allocs[0].Job
require.NoError(t, s.State().UpsertJob(structs.MsgTypeTestSetup, 10, job))
s.signAllocIdentities(job, allocs)
require.NoError(t, s.State().UpsertAllocs(structs.MsgTypeTestSetup, 15, allocs))
signedToken := allocs[0].SignedIdentities["web"]
// Lookup the first registration.
serviceRegReq := &structs.ServiceRegistrationByNameRequest{
ServiceName: services[0].ServiceName,
QueryOptions: structs.QueryOptions{
Namespace: services[0].Namespace,
Region: s.Region(),
AuthToken: signedToken,
},
}
var serviceRegResp structs.ServiceRegistrationByNameResponse
err := msgpackrpc.CallWithCodec(codec, structs.ServiceRegistrationGetServiceRPCMethod, serviceRegReq, &serviceRegResp)
require.NoError(t, err)
require.Equal(t, uint64(10), serviceRegResp.Services[0].CreateIndex)
require.Equal(t, uint64(20), serviceRegResp.Index)
require.Len(t, serviceRegResp.Services, 1)
},
name: "ACLs enabled using valid signed identity",
},
{
serverFn: func(t *testing.T) (*Server, *structs.ACLToken, func()) {
server, cleanup := TestServer(t, nil)

View File

@ -8,4 +8,5 @@ codecgen \
-d 100 \
-t codegen_generated \
-o structs.generated.go \
-nr="^IdentityClaims$" \
${FILES}

View File

@ -24,6 +24,7 @@ import (
"strings"
"time"
jwt "github.com/golang-jwt/jwt/v4"
"github.com/hashicorp/nomad/helper/escapingfs"
"golang.org/x/crypto/blake2b"
@ -9593,6 +9594,11 @@ type Allocation struct {
// to stop running because it got preempted
PreemptedByAllocation string
// SignedIdentities is a map of task names to signed
// identity/capability claim tokens for those tasks. If needed, it
// is populated in the plan applier
SignedIdentities map[string]string `json:"-"`
// Raft Indexes
CreateIndex uint64
ModifyIndex uint64
@ -10287,6 +10293,42 @@ func (a *Allocation) Reconnected() (bool, bool) {
return true, a.Expired(lastReconnect)
}
func (a *Allocation) ToIdentityClaims() *IdentityClaims {
now := jwt.NewNumericDate(time.Now().UTC())
return &IdentityClaims{
Namespace: a.Namespace,
JobID: a.JobID,
AllocationID: a.ID,
RegisteredClaims: jwt.RegisteredClaims{
// TODO: in Nomad 1.5.0 we'll have a refresh loop to
// prevent allocation identities from expiring before the
// allocation is terminal. Once that's implemented, add an
// ExpiresAt here ExpiresAt: &jwt.NumericDate{},
NotBefore: now,
IssuedAt: now,
},
}
}
func (a *Allocation) ToTaskIdentityClaims(taskName string) *IdentityClaims {
claims := a.ToIdentityClaims()
if claims != nil {
claims.TaskName = taskName
}
return claims
}
// IdentityClaims are the input to a JWT identifying a workload. It
// should never be serialized to msgpack unsigned.
type IdentityClaims struct {
Namespace string `json:"nomad_namespace"`
JobID string `json:"nomad_job_id"`
AllocationID string `json:"nomad_allocation_id"`
TaskName string `json:"nomad_task"`
jwt.RegisteredClaims
}
// AllocationDiff is another named type for Allocation (to use the same fields),
// which is used to represent the delta for an Allocation. If you need a method
// defined on the al