open-nomad/nomad/encrypter.go
Tim Gross 160ad9b64f implement Encrypt/Decrypt methods of encrypter (#13375)
The `Encrypt` method generates an appropriately-sized nonce and uses
that buffer as the prefix for the ciphertext. This keeps the
ciphertext and nonce together for decryption, and reuses the buffer as
much as possible without presenting the temptation to reuse the
cleartext buffer owned by the caller.

We include the key ID as the "additional data" field that's used as an
extra input to the authentication signature, to provide additional
protection that a ciphertext originated with that key.

Refactors the locking for the keyring so that the public methods are
generally (with one commented exception) responsible for taking the
lock and then inner methods are assumed locked.
2022-07-11 13:34:05 -04:00

502 lines
13 KiB
Go

package nomad
import (
"bytes"
"context"
"crypto/aes"
"crypto/cipher"
"crypto/ed25519"
"encoding/base64"
"encoding/json"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"sync"
"time"
// note: this is aliased so that it's more noticeable if someone
// accidentally swaps it out for math/rand via running goimports
cryptorand "crypto/rand"
jwt "github.com/golang-jwt/jwt/v4"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-msgpack/codec"
"golang.org/x/time/rate"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)
const nomadKeystoreExtension = ".nks.json"
// Encrypter is the keyring for secure variables.
type Encrypter struct {
srv *Server
keystorePath string
keyring map[string]*keyset
lock sync.RWMutex
}
type keyset struct {
rootKey *structs.RootKey
cipher cipher.AEAD
privateKey ed25519.PrivateKey
}
// NewEncrypter loads or creates a new local keystore and returns an
// encryption keyring with the keys it finds.
func NewEncrypter(srv *Server, keystorePath string) (*Encrypter, error) {
err := os.MkdirAll(keystorePath, 0700)
if err != nil {
return nil, err
}
encrypter, err := encrypterFromKeystore(keystorePath)
if err != nil {
return nil, err
}
encrypter.srv = srv
return encrypter, nil
}
func encrypterFromKeystore(keystoreDirectory string) (*Encrypter, error) {
encrypter := &Encrypter{
keyring: make(map[string]*keyset),
keystorePath: keystoreDirectory,
}
err := filepath.Walk(keystoreDirectory, func(path string, info fs.FileInfo, err error) error {
if err != nil {
return fmt.Errorf("could not read path %s from keystore: %v", path, err)
}
// skip over subdirectories and non-key files; they shouldn't
// be here but there's no reason to fail startup for it if the
// administrator has left something there
if path != keystoreDirectory && info.IsDir() {
return filepath.SkipDir
}
if !strings.HasSuffix(path, nomadKeystoreExtension) {
return nil
}
id := strings.TrimSuffix(filepath.Base(path), nomadKeystoreExtension)
if !helper.IsUUID(id) {
return nil
}
key, err := encrypter.loadKeyFromStore(path)
if err != nil {
return fmt.Errorf("could not load key file %s from keystore: %v", path, err)
}
if key.Meta.KeyID != id {
return fmt.Errorf("root key ID %s must match key file %s", key.Meta.KeyID, path)
}
err = encrypter.AddKey(key)
if err != nil {
return fmt.Errorf("could not add key file %s to keystore: %v", path, err)
}
return nil
})
if err != nil {
return nil, err
}
return encrypter, nil
}
// Encrypt encrypts the clear data with the cipher for the current
// root key, and returns the cipher text (including the nonce), and
// the key ID used to encrypt it
func (e *Encrypter) Encrypt(cleartext []byte) ([]byte, string, error) {
e.lock.RLock()
defer e.lock.RUnlock()
keyset, err := e.activeKeySetLocked()
if err != nil {
return nil, "", err
}
nonceSize := keyset.cipher.NonceSize()
nonce := make([]byte, nonceSize)
n, err := cryptorand.Read(nonce)
if err != nil {
return nil, "", err
}
if n < nonceSize {
return nil, "", fmt.Errorf("failed to encrypt: entropy exhausted")
}
keyID := keyset.rootKey.Meta.KeyID
additional := []byte(keyID) // include the keyID in the signature inputs
// we use the nonce as the dst buffer so that the ciphertext is
// appended to that buffer and we always keep the nonce and
// ciphertext together, and so that we're not tempted to reuse
// the cleartext buffer which the caller still owns
ciphertext := keyset.cipher.Seal(nonce, nonce, cleartext, additional)
return ciphertext, keyID, nil
}
// Decrypt takes an encrypted buffer and then root key ID. It extracts
// the nonce, decrypts the content, and returns the cleartext data.
func (e *Encrypter) Decrypt(ciphertext []byte, keyID string) ([]byte, error) {
e.lock.RLock()
defer e.lock.RUnlock()
keyset, err := e.keysetByIDLocked(keyID)
if err != nil {
return nil, err
}
nonceSize := keyset.cipher.NonceSize()
nonce := ciphertext[:nonceSize] // nonce was stored alongside ciphertext
additional := []byte(keyID) // keyID was included in the signature inputs
return keyset.cipher.Open(nil, nonce, ciphertext[nonceSize:], additional)
}
// keyIDHeader is the JWT header for the Nomad Key ID used to sign the
// claim. This name matches the common industry practice for this
// header name.
const keyIDHeader = "kid"
// SignClaims signs the identity claim for the task and returns an
// encoded JWT with both the claim and its signature
func (e *Encrypter) SignClaims(claim *structs.IdentityClaims) (string, error) {
e.lock.RLock()
defer e.lock.RUnlock()
keyset, err := e.activeKeySetLocked()
if err != nil {
return "", err
}
token := jwt.NewWithClaims(&jwt.SigningMethodEd25519{}, claim)
token.Header[keyIDHeader] = keyset.rootKey.Meta.KeyID
tokenString, err := token.SignedString(keyset.privateKey)
if err != nil {
return "", err
}
return tokenString, nil
}
// VerifyClaim accepts a previously-signed encoded claim and validates
// it before returning the claim
func (e *Encrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, error) {
e.lock.RLock()
defer e.lock.RUnlock()
token, err := jwt.ParseWithClaims(tokenString, &structs.IdentityClaims{}, func(token *jwt.Token) (interface{}, error) {
if _, ok := token.Method.(*jwt.SigningMethodEd25519); !ok {
return nil, fmt.Errorf("unexpected signing method: %v", token.Method.Alg())
}
raw := token.Header[keyIDHeader]
if raw == nil {
return nil, fmt.Errorf("missing key ID header")
}
keyID := raw.(string)
keyset, err := e.keysetByIDLocked(keyID)
if err != nil {
return nil, err
}
return keyset.privateKey.Public(), nil
})
if err != nil {
return nil, fmt.Errorf("failed to verify token: %v", err)
}
claims, ok := token.Claims.(*structs.IdentityClaims)
if !ok || !token.Valid {
return nil, fmt.Errorf("failed to verify token: invalid token")
}
return claims, nil
}
// AddKey stores the key in the keystore and creates a new cipher for it.
func (e *Encrypter) AddKey(rootKey *structs.RootKey) error {
// note: we don't lock the keyring here but inside addCipher
// instead, so that we're not holding the lock while performing
// local disk writes
if err := e.addCipher(rootKey); err != nil {
return err
}
if err := e.saveKeyToStore(rootKey); err != nil {
return err
}
return nil
}
// addCipher stores the key in the keyring and creates a new cipher for it.
func (e *Encrypter) addCipher(rootKey *structs.RootKey) error {
if rootKey == nil || rootKey.Meta == nil {
return fmt.Errorf("missing metadata")
}
var aead cipher.AEAD
switch rootKey.Meta.Algorithm {
case structs.EncryptionAlgorithmAES256GCM:
block, err := aes.NewCipher(rootKey.Key)
if err != nil {
return fmt.Errorf("could not create cipher: %v", err)
}
aead, err = cipher.NewGCM(block)
if err != nil {
return fmt.Errorf("could not create cipher: %v", err)
}
default:
return fmt.Errorf("invalid algorithm %s", rootKey.Meta.Algorithm)
}
privateKey := ed25519.NewKeyFromSeed(rootKey.Key)
e.lock.Lock()
defer e.lock.Unlock()
e.keyring[rootKey.Meta.KeyID] = &keyset{
rootKey: rootKey,
cipher: aead,
privateKey: privateKey,
}
return nil
}
// GetKey retrieves the key material by ID from the keyring
func (e *Encrypter) GetKey(keyID string) ([]byte, error) {
e.lock.RLock()
defer e.lock.RUnlock()
keyset, err := e.keysetByIDLocked(keyID)
if err != nil {
return nil, err
}
return keyset.rootKey.Key, nil
}
// activeKeySetLocked returns the keyset that belongs to the key marked as
// active in the state store (so that it's consistent with raft). The
// called must read-lock the keyring
func (e *Encrypter) activeKeySetLocked() (*keyset, error) {
store := e.srv.fsm.State()
keyMeta, err := store.GetActiveRootKeyMeta(nil)
if err != nil {
return nil, err
}
return e.keysetByIDLocked(keyMeta.KeyID)
}
// keysetByIDLocked returns the keyset for the specified keyID. The
// caller must read-lock the keyring
func (e *Encrypter) keysetByIDLocked(keyID string) (*keyset, error) {
keyset, ok := e.keyring[keyID]
if !ok {
return nil, fmt.Errorf("no such key %s in keyring", keyID)
}
return keyset, nil
}
// RemoveKey removes a key by ID from the keyring
func (e *Encrypter) RemoveKey(keyID string) error {
// TODO: should the server remove the serialized file here?
// TODO: given that it's irreversible, should the server *ever*
// remove the serialized file?
e.lock.Lock()
defer e.lock.Unlock()
delete(e.keyring, keyID)
return nil
}
// saveKeyToStore serializes a root key to the on-disk keystore.
func (e *Encrypter) saveKeyToStore(rootKey *structs.RootKey) error {
var buf bytes.Buffer
enc := codec.NewEncoder(&buf, structs.JsonHandleWithExtensions)
err := enc.Encode(rootKey)
if err != nil {
return err
}
path := filepath.Join(e.keystorePath, rootKey.Meta.KeyID+nomadKeystoreExtension)
err = os.WriteFile(path, buf.Bytes(), 0600)
if err != nil {
return err
}
return nil
}
// loadKeyFromStore deserializes a root key from disk.
func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) {
raw, err := os.ReadFile(path)
if err != nil {
return nil, err
}
storedKey := &struct {
Meta *structs.RootKeyMetaStub
Key string
}{}
if err := json.Unmarshal(raw, storedKey); err != nil {
return nil, err
}
meta := &structs.RootKeyMeta{
Active: storedKey.Meta.Active,
KeyID: storedKey.Meta.KeyID,
Algorithm: storedKey.Meta.Algorithm,
CreateTime: storedKey.Meta.CreateTime,
}
if err = meta.Validate(); err != nil {
return nil, err
}
key, err := base64.StdEncoding.DecodeString(storedKey.Key)
if err != nil {
return nil, fmt.Errorf("could not decode key: %v", err)
}
return &structs.RootKey{
Meta: meta,
Key: key,
}, nil
}
type KeyringReplicator struct {
srv *Server
encrypter *Encrypter
logger log.Logger
stopFn context.CancelFunc
}
func NewKeyringReplicator(srv *Server, e *Encrypter) *KeyringReplicator {
ctx, cancel := context.WithCancel(context.Background())
repl := &KeyringReplicator{
srv: srv,
encrypter: e,
logger: srv.logger.Named("keyring.replicator"),
stopFn: cancel,
}
go repl.run(ctx)
return repl
}
// stop is provided for testing
func (krr *KeyringReplicator) stop() {
krr.stopFn()
}
func (krr *KeyringReplicator) run(ctx context.Context) {
limiter := rate.NewLimiter(replicationRateLimit, int(replicationRateLimit))
krr.logger.Debug("starting encryption key replication")
defer krr.logger.Debug("exiting key replication")
retryErrTimer, stop := helper.NewSafeTimer(time.Second * 1)
defer stop()
START:
store := krr.srv.fsm.State()
for {
select {
case <-krr.srv.shutdownCtx.Done():
return
case <-ctx.Done():
return
default:
// Rate limit how often we attempt replication
limiter.Wait(ctx)
ws := store.NewWatchSet()
iter, err := store.RootKeyMetas(ws)
if err != nil {
krr.logger.Error("failed to fetch keyring", "error", err)
goto ERR_WAIT
}
for {
raw := iter.Next()
if raw == nil {
break
}
keyMeta := raw.(*structs.RootKeyMeta)
keyID := keyMeta.KeyID
if _, err := krr.encrypter.GetKey(keyID); err == nil {
// the key material is immutable so if we've already got it
// we can safely return early
continue
}
krr.logger.Trace("replicating new key", "id", keyID)
getReq := &structs.KeyringGetRootKeyRequest{
KeyID: keyID,
QueryOptions: structs.QueryOptions{
Region: krr.srv.config.Region,
},
}
getResp := &structs.KeyringGetRootKeyResponse{}
err := krr.srv.RPC("Keyring.Get", getReq, getResp)
if err != nil || getResp.Key == nil {
// Key replication needs to tolerate leadership
// flapping. If a key is rotated during a
// leadership transition, it's possible that the
// new leader has not yet replicated the key from
// the old leader before the transition. Ask all
// the other servers if they have it.
krr.logger.Debug("failed to fetch key from current leader",
"key", keyID, "error", err)
getReq.AllowStale = true
for _, peer := range krr.getAllPeers() {
err = krr.srv.forwardServer(peer, "Keyring.Get", getReq, getResp)
if err == nil {
break
}
}
if getResp.Key == nil {
krr.logger.Error("failed to fetch key from any peer",
"key", keyID, "error", err)
goto ERR_WAIT
}
}
err = krr.encrypter.AddKey(getResp.Key)
if err != nil {
krr.logger.Error("failed to add key", "key", keyID, "error", err)
goto ERR_WAIT
}
krr.logger.Trace("added key", "key", keyID)
}
}
}
ERR_WAIT:
// TODO: what's the right amount of backoff here? should this be
// part of our configuration?
retryErrTimer.Reset(1 * time.Second)
select {
case <-retryErrTimer.C:
goto START
case <-ctx.Done():
return
}
}
// TODO: move this method into Server?
func (krr *KeyringReplicator) getAllPeers() []*serverParts {
krr.srv.peerLock.RLock()
defer krr.srv.peerLock.RUnlock()
peers := make([]*serverParts, 0, len(krr.srv.localPeers))
for _, peer := range krr.srv.localPeers {
peers = append(peers, peer.Copy())
}
return peers
}