546 lines
15 KiB
Go
546 lines
15 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package nomad
|
|
|
|
import (
|
|
"context"
|
|
"crypto/aes"
|
|
"crypto/cipher"
|
|
"crypto/ed25519"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
jwt "github.com/golang-jwt/jwt/v5"
|
|
log "github.com/hashicorp/go-hclog"
|
|
kms "github.com/hashicorp/go-kms-wrapping/v2"
|
|
"github.com/hashicorp/go-kms-wrapping/v2/aead"
|
|
"golang.org/x/time/rate"
|
|
|
|
"github.com/hashicorp/nomad/helper"
|
|
"github.com/hashicorp/nomad/helper/crypto"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
const nomadKeystoreExtension = ".nks.json"
|
|
|
|
// Encrypter is the keyring for encrypting variables and signing workload
|
|
// identities.
|
|
type Encrypter struct {
|
|
srv *Server
|
|
keystorePath string
|
|
|
|
keyring map[string]*keyset
|
|
lock sync.RWMutex
|
|
}
|
|
|
|
type keyset struct {
|
|
rootKey *structs.RootKey
|
|
cipher cipher.AEAD
|
|
privateKey ed25519.PrivateKey
|
|
}
|
|
|
|
// NewEncrypter loads or creates a new local keystore and returns an
|
|
// encryption keyring with the keys it finds.
|
|
func NewEncrypter(srv *Server, keystorePath string) (*Encrypter, error) {
|
|
|
|
encrypter := &Encrypter{
|
|
srv: srv,
|
|
keystorePath: keystorePath,
|
|
keyring: make(map[string]*keyset),
|
|
}
|
|
|
|
err := encrypter.loadKeystore()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return encrypter, nil
|
|
}
|
|
|
|
func (e *Encrypter) loadKeystore() error {
|
|
|
|
if err := os.MkdirAll(e.keystorePath, 0o700); err != nil {
|
|
return err
|
|
}
|
|
|
|
return filepath.Walk(e.keystorePath, func(path string, info fs.FileInfo, err error) error {
|
|
if err != nil {
|
|
return fmt.Errorf("could not read path %s from keystore: %v", path, err)
|
|
}
|
|
|
|
// skip over subdirectories and non-key files; they shouldn't
|
|
// be here but there's no reason to fail startup for it if the
|
|
// administrator has left something there
|
|
if path != e.keystorePath && info.IsDir() {
|
|
return filepath.SkipDir
|
|
}
|
|
if !strings.HasSuffix(path, nomadKeystoreExtension) {
|
|
return nil
|
|
}
|
|
id := strings.TrimSuffix(filepath.Base(path), nomadKeystoreExtension)
|
|
if !helper.IsUUID(id) {
|
|
return nil
|
|
}
|
|
|
|
key, err := e.loadKeyFromStore(path)
|
|
if err != nil {
|
|
return fmt.Errorf("could not load key file %s from keystore: %v", path, err)
|
|
}
|
|
if key.Meta.KeyID != id {
|
|
return fmt.Errorf("root key ID %s must match key file %s", key.Meta.KeyID, path)
|
|
}
|
|
|
|
err = e.AddKey(key)
|
|
if err != nil {
|
|
return fmt.Errorf("could not add key file %s to keystore: %v", path, err)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// Encrypt encrypts the clear data with the cipher for the current
|
|
// root key, and returns the cipher text (including the nonce), and
|
|
// the key ID used to encrypt it
|
|
func (e *Encrypter) Encrypt(cleartext []byte) ([]byte, string, error) {
|
|
|
|
keyset, err := e.activeKeySet()
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
|
|
nonce, err := crypto.Bytes(keyset.cipher.NonceSize())
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("failed to generate key wrapper nonce: %v", err)
|
|
}
|
|
|
|
keyID := keyset.rootKey.Meta.KeyID
|
|
additional := []byte(keyID) // include the keyID in the signature inputs
|
|
|
|
// we use the nonce as the dst buffer so that the ciphertext is
|
|
// appended to that buffer and we always keep the nonce and
|
|
// ciphertext together, and so that we're not tempted to reuse
|
|
// the cleartext buffer which the caller still owns
|
|
ciphertext := keyset.cipher.Seal(nonce, nonce, cleartext, additional)
|
|
return ciphertext, keyID, nil
|
|
}
|
|
|
|
// Decrypt takes an encrypted buffer and then root key ID. It extracts
|
|
// the nonce, decrypts the content, and returns the cleartext data.
|
|
func (e *Encrypter) Decrypt(ciphertext []byte, keyID string) ([]byte, error) {
|
|
e.lock.RLock()
|
|
defer e.lock.RUnlock()
|
|
|
|
keyset, err := e.keysetByIDLocked(keyID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
nonceSize := keyset.cipher.NonceSize()
|
|
nonce := ciphertext[:nonceSize] // nonce was stored alongside ciphertext
|
|
additional := []byte(keyID) // keyID was included in the signature inputs
|
|
|
|
return keyset.cipher.Open(nil, nonce, ciphertext[nonceSize:], additional)
|
|
}
|
|
|
|
// keyIDHeader is the JWT header for the Nomad Key ID used to sign the
|
|
// claim. This name matches the common industry practice for this
|
|
// header name.
|
|
const keyIDHeader = "kid"
|
|
|
|
// SignClaims signs the identity claim for the task and returns an encoded JWT
|
|
// (including both the claim and its signature), the key ID of the key used to
|
|
// sign it, and any error.
|
|
func (e *Encrypter) SignClaims(claim *structs.IdentityClaims) (string, string, error) {
|
|
|
|
// If a key is rotated immediately following a leader election, plans that
|
|
// are in-flight may get signed before the new leader has the key. Allow for
|
|
// a short timeout-and-retry to avoid rejecting plans
|
|
keyset, err := e.activeKeySet()
|
|
if err != nil {
|
|
ctx, cancel := context.WithTimeout(e.srv.shutdownCtx, 5*time.Second)
|
|
defer cancel()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return "", "", err
|
|
default:
|
|
time.Sleep(50 * time.Millisecond)
|
|
keyset, err = e.activeKeySet()
|
|
if keyset != nil {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
token := jwt.NewWithClaims(&jwt.SigningMethodEd25519{}, claim)
|
|
token.Header[keyIDHeader] = keyset.rootKey.Meta.KeyID
|
|
|
|
tokenString, err := token.SignedString(keyset.privateKey)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
|
|
return tokenString, keyset.rootKey.Meta.KeyID, nil
|
|
}
|
|
|
|
// VerifyClaim accepts a previously-signed encoded claim and validates
|
|
// it before returning the claim
|
|
func (e *Encrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, error) {
|
|
|
|
token, err := jwt.ParseWithClaims(tokenString, &structs.IdentityClaims{}, func(token *jwt.Token) (interface{}, error) {
|
|
if _, ok := token.Method.(*jwt.SigningMethodEd25519); !ok {
|
|
return nil, fmt.Errorf("unexpected signing method: %v", token.Method.Alg())
|
|
}
|
|
raw := token.Header[keyIDHeader]
|
|
if raw == nil {
|
|
return nil, fmt.Errorf("missing key ID header")
|
|
}
|
|
keyID := raw.(string)
|
|
|
|
e.lock.RLock()
|
|
defer e.lock.RUnlock()
|
|
keyset, err := e.keysetByIDLocked(keyID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return keyset.privateKey.Public(), nil
|
|
})
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to verify token: %v", err)
|
|
}
|
|
|
|
claims, ok := token.Claims.(*structs.IdentityClaims)
|
|
if !ok || !token.Valid {
|
|
return nil, fmt.Errorf("failed to verify token: invalid token")
|
|
}
|
|
return claims, nil
|
|
}
|
|
|
|
// AddKey stores the key in the keystore and creates a new cipher for it.
|
|
func (e *Encrypter) AddKey(rootKey *structs.RootKey) error {
|
|
|
|
// note: we don't lock the keyring here but inside addCipher
|
|
// instead, so that we're not holding the lock while performing
|
|
// local disk writes
|
|
if err := e.addCipher(rootKey); err != nil {
|
|
return err
|
|
}
|
|
if err := e.saveKeyToStore(rootKey); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// addCipher stores the key in the keyring and creates a new cipher for it.
|
|
func (e *Encrypter) addCipher(rootKey *structs.RootKey) error {
|
|
|
|
if rootKey == nil || rootKey.Meta == nil {
|
|
return fmt.Errorf("missing metadata")
|
|
}
|
|
var aead cipher.AEAD
|
|
|
|
switch rootKey.Meta.Algorithm {
|
|
case structs.EncryptionAlgorithmAES256GCM:
|
|
block, err := aes.NewCipher(rootKey.Key)
|
|
if err != nil {
|
|
return fmt.Errorf("could not create cipher: %v", err)
|
|
}
|
|
aead, err = cipher.NewGCM(block)
|
|
if err != nil {
|
|
return fmt.Errorf("could not create cipher: %v", err)
|
|
}
|
|
default:
|
|
return fmt.Errorf("invalid algorithm %s", rootKey.Meta.Algorithm)
|
|
}
|
|
|
|
privateKey := ed25519.NewKeyFromSeed(rootKey.Key)
|
|
|
|
e.lock.Lock()
|
|
defer e.lock.Unlock()
|
|
e.keyring[rootKey.Meta.KeyID] = &keyset{
|
|
rootKey: rootKey,
|
|
cipher: aead,
|
|
privateKey: privateKey,
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// GetKey retrieves the key material by ID from the keyring
|
|
func (e *Encrypter) GetKey(keyID string) ([]byte, error) {
|
|
e.lock.RLock()
|
|
defer e.lock.RUnlock()
|
|
|
|
keyset, err := e.keysetByIDLocked(keyID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return keyset.rootKey.Key, nil
|
|
}
|
|
|
|
// activeKeySetLocked returns the keyset that belongs to the key marked as
|
|
// active in the state store (so that it's consistent with raft). The
|
|
// called must read-lock the keyring
|
|
func (e *Encrypter) activeKeySet() (*keyset, error) {
|
|
store := e.srv.fsm.State()
|
|
keyMeta, err := store.GetActiveRootKeyMeta(nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if keyMeta == nil {
|
|
return nil, fmt.Errorf("keyring has not been initialized yet")
|
|
}
|
|
e.lock.RLock()
|
|
defer e.lock.RUnlock()
|
|
return e.keysetByIDLocked(keyMeta.KeyID)
|
|
}
|
|
|
|
// keysetByIDLocked returns the keyset for the specified keyID. The
|
|
// caller must read-lock the keyring
|
|
func (e *Encrypter) keysetByIDLocked(keyID string) (*keyset, error) {
|
|
keyset, ok := e.keyring[keyID]
|
|
if !ok {
|
|
return nil, fmt.Errorf("no such key %q in keyring", keyID)
|
|
}
|
|
return keyset, nil
|
|
}
|
|
|
|
// RemoveKey removes a key by ID from the keyring
|
|
func (e *Encrypter) RemoveKey(keyID string) error {
|
|
e.lock.Lock()
|
|
defer e.lock.Unlock()
|
|
delete(e.keyring, keyID)
|
|
return nil
|
|
}
|
|
|
|
// saveKeyToStore serializes a root key to the on-disk keystore.
|
|
func (e *Encrypter) saveKeyToStore(rootKey *structs.RootKey) error {
|
|
|
|
kek, err := crypto.Bytes(32)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to generate key wrapper key: %v", err)
|
|
}
|
|
wrapper, err := e.newKMSWrapper(rootKey.Meta.KeyID, kek)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create encryption wrapper: %v", err)
|
|
}
|
|
blob, err := wrapper.Encrypt(e.srv.shutdownCtx, rootKey.Key)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to encrypt root key: %v", err)
|
|
}
|
|
|
|
kekWrapper := &structs.KeyEncryptionKeyWrapper{
|
|
Meta: rootKey.Meta,
|
|
EncryptedDataEncryptionKey: blob.Ciphertext,
|
|
KeyEncryptionKey: kek,
|
|
}
|
|
|
|
buf, err := json.Marshal(kekWrapper)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
path := filepath.Join(e.keystorePath, rootKey.Meta.KeyID+nomadKeystoreExtension)
|
|
err = os.WriteFile(path, buf, 0o600)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// loadKeyFromStore deserializes a root key from disk.
|
|
func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) {
|
|
|
|
raw, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
kekWrapper := &structs.KeyEncryptionKeyWrapper{}
|
|
if err := json.Unmarshal(raw, kekWrapper); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
meta := kekWrapper.Meta
|
|
if err = meta.Validate(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// the errors that bubble up from this library can be a bit opaque, so make
|
|
// sure we wrap them with as much context as possible
|
|
wrapper, err := e.newKMSWrapper(meta.KeyID, kekWrapper.KeyEncryptionKey)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to create key wrapper cipher: %v", err)
|
|
}
|
|
key, err := wrapper.Decrypt(e.srv.shutdownCtx, &kms.BlobInfo{
|
|
Ciphertext: kekWrapper.EncryptedDataEncryptionKey,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to decrypt wrapped root key: %v", err)
|
|
}
|
|
|
|
return &structs.RootKey{
|
|
Meta: meta,
|
|
Key: key,
|
|
}, nil
|
|
}
|
|
|
|
// newKMSWrapper returns a go-kms-wrapping interface the caller can use to
|
|
// encrypt the RootKey with a key encryption key (KEK). This is a bit of
|
|
// security theatre for local on-disk key material, but gives us a shim for
|
|
// external KMS providers in the future.
|
|
func (e *Encrypter) newKMSWrapper(keyID string, kek []byte) (kms.Wrapper, error) {
|
|
wrapper := aead.NewWrapper()
|
|
wrapper.SetConfig(context.Background(),
|
|
aead.WithAeadType(kms.AeadTypeAesGcm),
|
|
aead.WithHashType(kms.HashTypeSha256),
|
|
kms.WithKeyId(keyID),
|
|
)
|
|
err := wrapper.SetAesGcmKeyBytes(kek)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return wrapper, nil
|
|
}
|
|
|
|
type KeyringReplicator struct {
|
|
srv *Server
|
|
encrypter *Encrypter
|
|
logger log.Logger
|
|
stopFn context.CancelFunc
|
|
}
|
|
|
|
func NewKeyringReplicator(srv *Server, e *Encrypter) *KeyringReplicator {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
repl := &KeyringReplicator{
|
|
srv: srv,
|
|
encrypter: e,
|
|
logger: srv.logger.Named("keyring.replicator"),
|
|
stopFn: cancel,
|
|
}
|
|
go repl.run(ctx)
|
|
return repl
|
|
}
|
|
|
|
// stop is provided for testing
|
|
func (krr *KeyringReplicator) stop() {
|
|
krr.stopFn()
|
|
}
|
|
|
|
const keyringReplicationRate = 5
|
|
|
|
func (krr *KeyringReplicator) run(ctx context.Context) {
|
|
krr.logger.Debug("starting encryption key replication")
|
|
defer krr.logger.Debug("exiting key replication")
|
|
|
|
limiter := rate.NewLimiter(keyringReplicationRate, keyringReplicationRate)
|
|
|
|
for {
|
|
select {
|
|
case <-krr.srv.shutdownCtx.Done():
|
|
return
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
err := limiter.Wait(ctx)
|
|
if err != nil {
|
|
continue // rate limit exceeded
|
|
}
|
|
|
|
store := krr.srv.fsm.State()
|
|
iter, err := store.RootKeyMetas(nil)
|
|
if err != nil {
|
|
krr.logger.Error("failed to fetch keyring", "error", err)
|
|
continue
|
|
}
|
|
for {
|
|
raw := iter.Next()
|
|
if raw == nil {
|
|
break
|
|
}
|
|
|
|
keyMeta := raw.(*structs.RootKeyMeta)
|
|
if key, err := krr.encrypter.GetKey(keyMeta.KeyID); err == nil && len(key) > 0 {
|
|
// the key material is immutable so if we've already got it
|
|
// we can move on to the next key
|
|
continue
|
|
}
|
|
|
|
err := krr.replicateKey(ctx, keyMeta)
|
|
if err != nil {
|
|
// don't break the loop on an error, as we want to make sure
|
|
// we've replicated any keys we can. the rate limiter will
|
|
// prevent this case from sending excessive RPCs
|
|
krr.logger.Error(err.Error(), "key", keyMeta.KeyID)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// replicateKey replicates a single key from peer servers that was present in
|
|
// the state store but missing from the keyring. Returns an error only if no
|
|
// peers have this key.
|
|
func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs.RootKeyMeta) error {
|
|
keyID := keyMeta.KeyID
|
|
krr.logger.Debug("replicating new key", "id", keyID)
|
|
|
|
getReq := &structs.KeyringGetRootKeyRequest{
|
|
KeyID: keyID,
|
|
QueryOptions: structs.QueryOptions{
|
|
Region: krr.srv.config.Region,
|
|
MinQueryIndex: keyMeta.ModifyIndex - 1,
|
|
},
|
|
}
|
|
getResp := &structs.KeyringGetRootKeyResponse{}
|
|
err := krr.srv.RPC("Keyring.Get", getReq, getResp)
|
|
|
|
if err != nil || getResp.Key == nil {
|
|
// Key replication needs to tolerate leadership flapping. If a key is
|
|
// rotated during a leadership transition, it's possible that the new
|
|
// leader has not yet replicated the key from the old leader before the
|
|
// transition. Ask all the other servers if they have it.
|
|
krr.logger.Warn("failed to fetch key from current leader, trying peers",
|
|
"key", keyID, "error", err)
|
|
getReq.AllowStale = true
|
|
for _, peer := range krr.getAllPeers() {
|
|
err = krr.srv.forwardServer(peer, "Keyring.Get", getReq, getResp)
|
|
if err == nil && getResp.Key != nil {
|
|
break
|
|
}
|
|
}
|
|
if getResp.Key == nil {
|
|
krr.logger.Error("failed to fetch key from any peer",
|
|
"key", keyID, "error", err)
|
|
return fmt.Errorf("failed to fetch key from any peer: %v", err)
|
|
}
|
|
}
|
|
|
|
err = krr.encrypter.AddKey(getResp.Key)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to add key to keyring: %v", err)
|
|
}
|
|
|
|
krr.logger.Debug("added key", "key", keyID)
|
|
return nil
|
|
}
|
|
|
|
// TODO: move this method into Server?
|
|
func (krr *KeyringReplicator) getAllPeers() []*serverParts {
|
|
krr.srv.peerLock.RLock()
|
|
defer krr.srv.peerLock.RUnlock()
|
|
peers := make([]*serverParts, 0, len(krr.srv.localPeers))
|
|
for _, peer := range krr.srv.localPeers {
|
|
peers = append(peers, peer.Copy())
|
|
}
|
|
return peers
|
|
}
|