package nomad import ( "context" "crypto/aes" "crypto/cipher" "crypto/ed25519" "encoding/json" "fmt" "io/fs" "os" "path/filepath" "strings" "sync" "time" jwt "github.com/golang-jwt/jwt/v4" log "github.com/hashicorp/go-hclog" kms "github.com/hashicorp/go-kms-wrapping/v2" "github.com/hashicorp/go-kms-wrapping/v2/aead" "golang.org/x/time/rate" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/crypto" "github.com/hashicorp/nomad/nomad/structs" ) const nomadKeystoreExtension = ".nks.json" // Encrypter is the keyring for encrypting variables and signing workload // identities. type Encrypter struct { srv *Server keystorePath string keyring map[string]*keyset lock sync.RWMutex } type keyset struct { rootKey *structs.RootKey cipher cipher.AEAD privateKey ed25519.PrivateKey } // NewEncrypter loads or creates a new local keystore and returns an // encryption keyring with the keys it finds. func NewEncrypter(srv *Server, keystorePath string) (*Encrypter, error) { encrypter := &Encrypter{ srv: srv, keystorePath: keystorePath, keyring: make(map[string]*keyset), } err := encrypter.loadKeystore() if err != nil { return nil, err } return encrypter, nil } func (e *Encrypter) loadKeystore() error { if err := os.MkdirAll(e.keystorePath, 0o700); err != nil { return err } return filepath.Walk(e.keystorePath, func(path string, info fs.FileInfo, err error) error { if err != nil { return fmt.Errorf("could not read path %s from keystore: %v", path, err) } // skip over subdirectories and non-key files; they shouldn't // be here but there's no reason to fail startup for it if the // administrator has left something there if path != e.keystorePath && info.IsDir() { return filepath.SkipDir } if !strings.HasSuffix(path, nomadKeystoreExtension) { return nil } id := strings.TrimSuffix(filepath.Base(path), nomadKeystoreExtension) if !helper.IsUUID(id) { return nil } key, err := e.loadKeyFromStore(path) if err != nil { return fmt.Errorf("could not load key file %s from keystore: %v", path, err) } if key.Meta.KeyID != id { return fmt.Errorf("root key ID %s must match key file %s", key.Meta.KeyID, path) } err = e.AddKey(key) if err != nil { return fmt.Errorf("could not add key file %s to keystore: %v", path, err) } return nil }) } // Encrypt encrypts the clear data with the cipher for the current // root key, and returns the cipher text (including the nonce), and // the key ID used to encrypt it func (e *Encrypter) Encrypt(cleartext []byte) ([]byte, string, error) { e.lock.RLock() defer e.lock.RUnlock() keyset, err := e.activeKeySetLocked() if err != nil { return nil, "", err } nonce, err := crypto.Bytes(keyset.cipher.NonceSize()) if err != nil { return nil, "", fmt.Errorf("failed to generate key wrapper nonce: %v", err) } keyID := keyset.rootKey.Meta.KeyID additional := []byte(keyID) // include the keyID in the signature inputs // we use the nonce as the dst buffer so that the ciphertext is // appended to that buffer and we always keep the nonce and // ciphertext together, and so that we're not tempted to reuse // the cleartext buffer which the caller still owns ciphertext := keyset.cipher.Seal(nonce, nonce, cleartext, additional) return ciphertext, keyID, nil } // Decrypt takes an encrypted buffer and then root key ID. It extracts // the nonce, decrypts the content, and returns the cleartext data. func (e *Encrypter) Decrypt(ciphertext []byte, keyID string) ([]byte, error) { e.lock.RLock() defer e.lock.RUnlock() keyset, err := e.keysetByIDLocked(keyID) if err != nil { return nil, err } nonceSize := keyset.cipher.NonceSize() nonce := ciphertext[:nonceSize] // nonce was stored alongside ciphertext additional := []byte(keyID) // keyID was included in the signature inputs return keyset.cipher.Open(nil, nonce, ciphertext[nonceSize:], additional) } // keyIDHeader is the JWT header for the Nomad Key ID used to sign the // claim. This name matches the common industry practice for this // header name. const keyIDHeader = "kid" // SignClaims signs the identity claim for the task and returns an // encoded JWT with both the claim and its signature func (e *Encrypter) SignClaims(claim *structs.IdentityClaims) (string, error) { e.lock.RLock() defer e.lock.RUnlock() keyset, err := e.activeKeySetLocked() if err != nil { return "", err } token := jwt.NewWithClaims(&jwt.SigningMethodEd25519{}, claim) token.Header[keyIDHeader] = keyset.rootKey.Meta.KeyID tokenString, err := token.SignedString(keyset.privateKey) if err != nil { return "", err } return tokenString, nil } // VerifyClaim accepts a previously-signed encoded claim and validates // it before returning the claim func (e *Encrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, error) { e.lock.RLock() defer e.lock.RUnlock() token, err := jwt.ParseWithClaims(tokenString, &structs.IdentityClaims{}, func(token *jwt.Token) (interface{}, error) { if _, ok := token.Method.(*jwt.SigningMethodEd25519); !ok { return nil, fmt.Errorf("unexpected signing method: %v", token.Method.Alg()) } raw := token.Header[keyIDHeader] if raw == nil { return nil, fmt.Errorf("missing key ID header") } keyID := raw.(string) keyset, err := e.keysetByIDLocked(keyID) if err != nil { return nil, err } return keyset.privateKey.Public(), nil }) if err != nil { return nil, fmt.Errorf("failed to verify token: %v", err) } claims, ok := token.Claims.(*structs.IdentityClaims) if !ok || !token.Valid { return nil, fmt.Errorf("failed to verify token: invalid token") } return claims, nil } // AddKey stores the key in the keystore and creates a new cipher for it. func (e *Encrypter) AddKey(rootKey *structs.RootKey) error { // note: we don't lock the keyring here but inside addCipher // instead, so that we're not holding the lock while performing // local disk writes if err := e.addCipher(rootKey); err != nil { return err } if err := e.saveKeyToStore(rootKey); err != nil { return err } return nil } // addCipher stores the key in the keyring and creates a new cipher for it. func (e *Encrypter) addCipher(rootKey *structs.RootKey) error { if rootKey == nil || rootKey.Meta == nil { return fmt.Errorf("missing metadata") } var aead cipher.AEAD switch rootKey.Meta.Algorithm { case structs.EncryptionAlgorithmAES256GCM: block, err := aes.NewCipher(rootKey.Key) if err != nil { return fmt.Errorf("could not create cipher: %v", err) } aead, err = cipher.NewGCM(block) if err != nil { return fmt.Errorf("could not create cipher: %v", err) } default: return fmt.Errorf("invalid algorithm %s", rootKey.Meta.Algorithm) } privateKey := ed25519.NewKeyFromSeed(rootKey.Key) e.lock.Lock() defer e.lock.Unlock() e.keyring[rootKey.Meta.KeyID] = &keyset{ rootKey: rootKey, cipher: aead, privateKey: privateKey, } return nil } // GetKey retrieves the key material by ID from the keyring func (e *Encrypter) GetKey(keyID string) ([]byte, error) { e.lock.RLock() defer e.lock.RUnlock() keyset, err := e.keysetByIDLocked(keyID) if err != nil { return nil, err } return keyset.rootKey.Key, nil } // activeKeySetLocked returns the keyset that belongs to the key marked as // active in the state store (so that it's consistent with raft). The // called must read-lock the keyring func (e *Encrypter) activeKeySetLocked() (*keyset, error) { store := e.srv.fsm.State() keyMeta, err := store.GetActiveRootKeyMeta(nil) if err != nil { return nil, err } return e.keysetByIDLocked(keyMeta.KeyID) } // keysetByIDLocked returns the keyset for the specified keyID. The // caller must read-lock the keyring func (e *Encrypter) keysetByIDLocked(keyID string) (*keyset, error) { keyset, ok := e.keyring[keyID] if !ok { return nil, fmt.Errorf("no such key %q in keyring", keyID) } return keyset, nil } // RemoveKey removes a key by ID from the keyring func (e *Encrypter) RemoveKey(keyID string) error { e.lock.Lock() defer e.lock.Unlock() delete(e.keyring, keyID) return nil } // saveKeyToStore serializes a root key to the on-disk keystore. func (e *Encrypter) saveKeyToStore(rootKey *structs.RootKey) error { kek, err := crypto.Bytes(32) if err != nil { return fmt.Errorf("failed to generate key wrapper key: %v", err) } wrapper, err := e.newKMSWrapper(rootKey.Meta.KeyID, kek) if err != nil { return fmt.Errorf("failed to create encryption wrapper: %v", err) } blob, err := wrapper.Encrypt(e.srv.shutdownCtx, rootKey.Key) if err != nil { return fmt.Errorf("failed to encrypt root key: %v", err) } kekWrapper := &structs.KeyEncryptionKeyWrapper{ Meta: rootKey.Meta, EncryptedDataEncryptionKey: blob.Ciphertext, KeyEncryptionKey: kek, } buf, err := json.Marshal(kekWrapper) if err != nil { return err } path := filepath.Join(e.keystorePath, rootKey.Meta.KeyID+nomadKeystoreExtension) err = os.WriteFile(path, buf, 0o600) if err != nil { return err } return nil } // loadKeyFromStore deserializes a root key from disk. func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) { raw, err := os.ReadFile(path) if err != nil { return nil, err } kekWrapper := &structs.KeyEncryptionKeyWrapper{} if err := json.Unmarshal(raw, kekWrapper); err != nil { return nil, err } meta := kekWrapper.Meta if err = meta.Validate(); err != nil { return nil, err } // the errors that bubble up from this library can be a bit opaque, so make // sure we wrap them with as much context as possible wrapper, err := e.newKMSWrapper(meta.KeyID, kekWrapper.KeyEncryptionKey) if err != nil { return nil, fmt.Errorf("unable to create key wrapper cipher: %v", err) } key, err := wrapper.Decrypt(e.srv.shutdownCtx, &kms.BlobInfo{ Ciphertext: kekWrapper.EncryptedDataEncryptionKey, }) if err != nil { return nil, fmt.Errorf("unable to decrypt wrapped root key: %v", err) } return &structs.RootKey{ Meta: meta, Key: key, }, nil } // newKMSWrapper returns a go-kms-wrapping interface the caller can use to // encrypt the RootKey with a key encryption key (KEK). This is a bit of // security theatre for local on-disk key material, but gives us a shim for // external KMS providers in the future. func (e *Encrypter) newKMSWrapper(keyID string, kek []byte) (kms.Wrapper, error) { wrapper := aead.NewWrapper() wrapper.SetConfig(context.Background(), aead.WithAeadType(kms.AeadTypeAesGcm), aead.WithHashType(kms.HashTypeSha256), kms.WithKeyId(keyID), ) err := wrapper.SetAesGcmKeyBytes(kek) if err != nil { return nil, err } return wrapper, nil } type KeyringReplicator struct { srv *Server encrypter *Encrypter logger log.Logger stopFn context.CancelFunc } func NewKeyringReplicator(srv *Server, e *Encrypter) *KeyringReplicator { ctx, cancel := context.WithCancel(context.Background()) repl := &KeyringReplicator{ srv: srv, encrypter: e, logger: srv.logger.Named("keyring.replicator"), stopFn: cancel, } go repl.run(ctx) return repl } // stop is provided for testing func (krr *KeyringReplicator) stop() { krr.stopFn() } func (krr *KeyringReplicator) run(ctx context.Context) { limiter := rate.NewLimiter(replicationRateLimit, int(replicationRateLimit)) krr.logger.Debug("starting encryption key replication") defer krr.logger.Debug("exiting key replication") retryErrTimer, stop := helper.NewSafeTimer(time.Second * 1) defer stop() START: store := krr.srv.fsm.State() for { select { case <-krr.srv.shutdownCtx.Done(): return case <-ctx.Done(): return default: // Rate limit how often we attempt replication limiter.Wait(ctx) ws := store.NewWatchSet() iter, err := store.RootKeyMetas(ws) if err != nil { krr.logger.Error("failed to fetch keyring", "error", err) goto ERR_WAIT } for { raw := iter.Next() if raw == nil { break } keyMeta := raw.(*structs.RootKeyMeta) keyID := keyMeta.KeyID if _, err := krr.encrypter.GetKey(keyID); err == nil { // the key material is immutable so if we've already got it // we can safely return early continue } krr.logger.Trace("replicating new key", "id", keyID) getReq := &structs.KeyringGetRootKeyRequest{ KeyID: keyID, QueryOptions: structs.QueryOptions{ Region: krr.srv.config.Region, }, } getResp := &structs.KeyringGetRootKeyResponse{} err := krr.srv.RPC("Keyring.Get", getReq, getResp) if err != nil || getResp.Key == nil { // Key replication needs to tolerate leadership // flapping. If a key is rotated during a // leadership transition, it's possible that the // new leader has not yet replicated the key from // the old leader before the transition. Ask all // the other servers if they have it. krr.logger.Debug("failed to fetch key from current leader", "key", keyID, "error", err) getReq.AllowStale = true for _, peer := range krr.getAllPeers() { err = krr.srv.forwardServer(peer, "Keyring.Get", getReq, getResp) if err == nil { break } } if getResp.Key == nil { krr.logger.Error("failed to fetch key from any peer", "key", keyID, "error", err) goto ERR_WAIT } } err = krr.encrypter.AddKey(getResp.Key) if err != nil { krr.logger.Error("failed to add key", "key", keyID, "error", err) goto ERR_WAIT } krr.logger.Trace("added key", "key", keyID) } } } ERR_WAIT: retryErrTimer.Reset(1 * time.Second) select { case <-retryErrTimer.C: goto START case <-ctx.Done(): return } } // TODO: move this method into Server? func (krr *KeyringReplicator) getAllPeers() []*serverParts { krr.srv.peerLock.RLock() defer krr.srv.peerLock.RUnlock() peers := make([]*serverParts, 0, len(krr.srv.localPeers)) for _, peer := range krr.srv.localPeers { peers = append(peers, peer.Copy()) } return peers }