open-vault/vault/seal_autoseal.go

623 lines
17 KiB
Go
Raw Normal View History

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package vault
import (
"bytes"
"context"
"crypto/subtle"
"encoding/json"
"fmt"
mathrand "math/rand"
"sync"
"sync/atomic"
"time"
aeadwrapper "github.com/hashicorp/go-kms-wrapping/wrappers/aead/v2"
proto "github.com/golang/protobuf/proto"
log "github.com/hashicorp/go-hclog"
wrapping "github.com/hashicorp/go-kms-wrapping/v2"
"github.com/hashicorp/vault/sdk/physical"
"github.com/hashicorp/vault/vault/seal"
)
// barrierTypeUpgradeCheck checks for backwards compat on barrier type, not
// applicable in the OSS side
var (
barrierTypeUpgradeCheck = func(_ wrapping.WrapperType, _ *SealConfig) {}
autoSealUnavailableDuration = []string{"seal", "unreachable", "time"}
// vars for unit testings
sealHealthTestIntervalNominal = 10 * time.Minute
sealHealthTestIntervalUnhealthy = 1 * time.Minute
sealHealthTestTimeout = 1 * time.Minute
)
// autoSeal is a Seal implementation that contains logic for encrypting and
// decrypting stored keys via an underlying AutoSealAccess implementation, as
// well as logic related to recovery keys and barrier config.
type autoSeal struct {
seal.Access
barrierType wrapping.WrapperType
barrierConfig atomic.Value
recoveryConfig atomic.Value
core *Core
logger log.Logger
hcLock sync.Mutex
healthCheckStop chan struct{}
}
// Ensure we are implementing the Seal interface
var _ Seal = (*autoSeal)(nil)
func NewAutoSeal(lowLevel seal.Access) (*autoSeal, error) {
ret := &autoSeal{
Access: lowLevel,
}
ret.barrierConfig.Store((*SealConfig)(nil))
ret.recoveryConfig.Store((*SealConfig)(nil))
// Having the wrapper type in a field is just a convenience since Seal.BarrierType()
// does not return an error.
var err error
ret.barrierType, err = ret.Type(context.Background())
if err != nil {
return nil, err
}
return ret, nil
}
func (d *autoSeal) SealWrapable() bool {
return true
}
func (d *autoSeal) GetAccess() seal.Access {
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
return d.Access
}
func (d *autoSeal) checkCore() error {
if d.core == nil {
return fmt.Errorf("seal does not have a core set")
}
return nil
}
func (d *autoSeal) SetCore(core *Core) {
d.core = core
if d.logger == nil {
d.logger = d.core.Logger().Named("autoseal")
d.core.AddLogger(d.logger)
}
}
func (d *autoSeal) Init(ctx context.Context) error {
return d.Access.Init(ctx)
}
func (d *autoSeal) Finalize(ctx context.Context) error {
return d.Access.Finalize(ctx)
}
func (d *autoSeal) BarrierType() wrapping.WrapperType {
return d.barrierType
}
func (d *autoSeal) GetShamirWrapper() (*aeadwrapper.ShamirWrapper, error) {
return nil, fmt.Errorf("autoSeal does not use a ShamirWrapper")
}
func (d *autoSeal) StoredKeysSupported() seal.StoredKeysSupport {
return seal.StoredKeysSupportedGeneric
}
func (d *autoSeal) RecoveryKeySupported() bool {
return true
}
// SetStoredKeys uses the autoSeal.Access.Encrypts method to wrap the keys. The stored entry
// does not need to be seal wrapped in this case.
func (d *autoSeal) SetStoredKeys(ctx context.Context, keys [][]byte) error {
return writeStoredKeys(ctx, d.core.physical, d.Access, keys)
}
// GetStoredKeys retrieves the key shares by unwrapping the encrypted key using the
// autoseal.
func (d *autoSeal) GetStoredKeys(ctx context.Context) ([][]byte, error) {
return readStoredKeys(ctx, d.core.physical, d.Access)
}
func (d *autoSeal) upgradeStoredKeys(ctx context.Context) error {
pe, err := d.core.physical.Get(ctx, StoredBarrierKeysPath)
if err != nil {
return fmt.Errorf("failed to fetch stored keys: %w", err)
}
if pe == nil {
return fmt.Errorf("no stored keys found")
}
blobInfo := &wrapping.BlobInfo{}
if err := proto.Unmarshal(pe.Value, blobInfo); err != nil {
return fmt.Errorf("failed to proto decode stored keys: %w", err)
}
keyId, err := d.Access.KeyId(ctx)
if err != nil {
return err
}
if blobInfo.KeyInfo != nil && blobInfo.KeyInfo.KeyId != keyId {
d.logger.Info("upgrading stored keys")
pt, err := d.Decrypt(ctx, blobInfo, nil)
if err != nil {
return fmt.Errorf("failed to decrypt encrypted stored keys: %w", err)
}
// Decode the barrier entry
var keys [][]byte
if err := json.Unmarshal(pt, &keys); err != nil {
return fmt.Errorf("failed to decode stored keys: %w", err)
}
if err := d.SetStoredKeys(ctx, keys); err != nil {
return fmt.Errorf("failed to save upgraded stored keys: %w", err)
}
}
return nil
}
// UpgradeKeys re-encrypts and saves the stored keys and the recovery key
// with the current key if the current KeyId is different from the KeyId
// the stored keys and the recovery key are encrypted with. The provided
// Context must be non-nil.
func (d *autoSeal) UpgradeKeys(ctx context.Context) error {
// Many of the seals update their keys to the latest KeyId when Encrypt
// is called.
if _, err := d.Encrypt(ctx, []byte("a"), nil); err != nil {
return err
}
if err := d.upgradeRecoveryKey(ctx); err != nil {
return err
}
if err := d.upgradeStoredKeys(ctx); err != nil {
return err
}
return nil
}
func (d *autoSeal) BarrierConfig(ctx context.Context) (*SealConfig, error) {
if d.barrierConfig.Load().(*SealConfig) != nil {
return d.barrierConfig.Load().(*SealConfig).Clone(), nil
}
if err := d.checkCore(); err != nil {
return nil, err
}
sealType := "barrier"
entry, err := d.core.physical.Get(ctx, barrierSealConfigPath)
if err != nil {
d.logger.Error("failed to read seal configuration", "seal_type", sealType, "error", err)
return nil, fmt.Errorf("failed to read %q seal configuration: %w", sealType, err)
}
// If the seal configuration is missing, we are not initialized
if entry == nil {
if d.logger.IsInfo() {
d.logger.Info("seal configuration missing, not initialized", "seal_type", sealType)
}
return nil, nil
}
conf := &SealConfig{}
err = json.Unmarshal(entry.Value, conf)
if err != nil {
d.logger.Error("failed to decode seal configuration", "seal_type", sealType, "error", err)
return nil, fmt.Errorf("failed to decode %q seal configuration: %w", sealType, err)
}
// Check for a valid seal configuration
if err := conf.Validate(); err != nil {
d.logger.Error("invalid seal configuration", "seal_type", sealType, "error", err)
return nil, fmt.Errorf("%q seal validation failed: %w", sealType, err)
}
barrierTypeUpgradeCheck(d.BarrierType(), conf)
if conf.Type != d.BarrierType().String() {
d.logger.Error("barrier seal type does not match loaded type", "seal_type", conf.Type, "loaded_type", d.BarrierType())
return nil, fmt.Errorf("barrier seal type of %q does not match loaded type of %q", conf.Type, d.BarrierType())
}
d.SetCachedBarrierConfig(conf)
return conf.Clone(), nil
}
func (d *autoSeal) SetBarrierConfig(ctx context.Context, conf *SealConfig) error {
if err := d.checkCore(); err != nil {
return err
}
if conf == nil {
d.barrierConfig.Store((*SealConfig)(nil))
return nil
}
conf.Type = d.BarrierType().String()
// Encode the seal configuration
buf, err := json.Marshal(conf)
if err != nil {
return fmt.Errorf("failed to encode barrier seal configuration: %w", err)
}
// Store the seal configuration
pe := &physical.Entry{
Key: barrierSealConfigPath,
Value: buf,
}
if err := d.core.physical.Put(ctx, pe); err != nil {
d.logger.Error("failed to write barrier seal configuration", "error", err)
return fmt.Errorf("failed to write barrier seal configuration: %w", err)
}
d.SetCachedBarrierConfig(conf.Clone())
return nil
}
2018-10-23 06:34:02 +00:00
func (d *autoSeal) SetCachedBarrierConfig(config *SealConfig) {
d.barrierConfig.Store(config)
}
func (d *autoSeal) RecoveryType() string {
return RecoveryTypeShamir
}
// RecoveryConfig returns the recovery config on recoverySealConfigPlaintextPath.
func (d *autoSeal) RecoveryConfig(ctx context.Context) (*SealConfig, error) {
if d.recoveryConfig.Load().(*SealConfig) != nil {
return d.recoveryConfig.Load().(*SealConfig).Clone(), nil
}
if err := d.checkCore(); err != nil {
return nil, err
}
sealType := "recovery"
var entry *physical.Entry
var err error
entry, err = d.core.physical.Get(ctx, recoverySealConfigPlaintextPath)
if err != nil {
d.logger.Error("failed to read seal configuration", "seal_type", sealType, "error", err)
return nil, fmt.Errorf("failed to read %q seal configuration: %w", sealType, err)
}
if entry == nil {
if d.core.Sealed() {
d.logger.Info("seal configuration missing, but cannot check old path as core is sealed", "seal_type", sealType)
return nil, nil
}
// Check the old recovery seal config path so an upgraded standby will
// return the correct seal config
be, err := d.core.barrier.Get(ctx, recoverySealConfigPath)
if err != nil {
return nil, fmt.Errorf("failed to read old recovery seal configuration: %w", err)
}
// If the seal configuration is missing, then we are not initialized.
if be == nil {
if d.logger.IsInfo() {
d.logger.Info("seal configuration missing, not initialized", "seal_type", sealType)
}
return nil, nil
}
// Reconstruct the physical entry
entry = &physical.Entry{
Key: be.Key,
Value: be.Value,
}
}
conf := &SealConfig{}
if err := json.Unmarshal(entry.Value, conf); err != nil {
d.logger.Error("failed to decode seal configuration", "seal_type", sealType, "error", err)
return nil, fmt.Errorf("failed to decode %q seal configuration: %w", sealType, err)
}
// Check for a valid seal configuration
if err := conf.Validate(); err != nil {
d.logger.Error("invalid seal configuration", "seal_type", sealType, "error", err)
return nil, fmt.Errorf("%q seal validation failed: %w", sealType, err)
}
if conf.Type != d.RecoveryType() {
d.logger.Error("recovery seal type does not match loaded type", "seal_type", conf.Type, "loaded_type", d.RecoveryType())
return nil, fmt.Errorf("recovery seal type of %q does not match loaded type of %q", conf.Type, d.RecoveryType())
}
d.recoveryConfig.Store(conf)
return conf.Clone(), nil
}
// SetRecoveryConfig writes the recovery configuration to the physical storage
// and sets it as the seal's recoveryConfig.
func (d *autoSeal) SetRecoveryConfig(ctx context.Context, conf *SealConfig) error {
if err := d.checkCore(); err != nil {
return err
}
// Perform migration if applicable
if err := d.migrateRecoveryConfig(ctx); err != nil {
return err
}
if conf == nil {
d.recoveryConfig.Store((*SealConfig)(nil))
return nil
}
conf.Type = d.RecoveryType()
// Encode the seal configuration
buf, err := json.Marshal(conf)
if err != nil {
return fmt.Errorf("failed to encode recovery seal configuration: %w", err)
}
// Store the seal configuration directly in the physical storage
pe := &physical.Entry{
Key: recoverySealConfigPlaintextPath,
Value: buf,
}
if err := d.core.physical.Put(ctx, pe); err != nil {
d.logger.Error("failed to write recovery seal configuration", "error", err)
return fmt.Errorf("failed to write recovery seal configuration: %w", err)
}
d.recoveryConfig.Store(conf.Clone())
return nil
}
2018-10-23 06:34:02 +00:00
func (d *autoSeal) SetCachedRecoveryConfig(config *SealConfig) {
d.recoveryConfig.Store(config)
}
func (d *autoSeal) VerifyRecoveryKey(ctx context.Context, key []byte) error {
if key == nil {
return fmt.Errorf("recovery key to verify is nil")
}
pt, err := d.getRecoveryKeyInternal(ctx)
if err != nil {
2019-02-01 19:29:55 +00:00
return err
}
if subtle.ConstantTimeCompare(key, pt) != 1 {
return fmt.Errorf("recovery key does not match submitted values")
}
return nil
}
func (d *autoSeal) SetRecoveryKey(ctx context.Context, key []byte) error {
if err := d.checkCore(); err != nil {
return err
}
if key == nil {
return fmt.Errorf("recovery key to store is nil")
}
// Encrypt and marshal the keys
blobInfo, err := d.Encrypt(ctx, key, nil)
if err != nil {
return fmt.Errorf("failed to encrypt keys for storage: %w", err)
}
value, err := proto.Marshal(blobInfo)
if err != nil {
return fmt.Errorf("failed to marshal value for storage: %w", err)
}
be := &physical.Entry{
Key: recoveryKeyPath,
Value: value,
}
if err := d.core.physical.Put(ctx, be); err != nil {
d.logger.Error("failed to write recovery key", "error", err)
return fmt.Errorf("failed to write recovery key: %w", err)
}
return nil
}
func (d *autoSeal) RecoveryKey(ctx context.Context) ([]byte, error) {
return d.getRecoveryKeyInternal(ctx)
}
func (d *autoSeal) getRecoveryKeyInternal(ctx context.Context) ([]byte, error) {
pe, err := d.core.physical.Get(ctx, recoveryKeyPath)
if err != nil {
d.logger.Error("failed to read recovery key", "error", err)
return nil, fmt.Errorf("failed to read recovery key: %w", err)
}
if pe == nil {
d.logger.Warn("no recovery key found")
return nil, fmt.Errorf("no recovery key found")
}
blobInfo := &wrapping.BlobInfo{}
if err := proto.Unmarshal(pe.Value, blobInfo); err != nil {
return nil, fmt.Errorf("failed to proto decode stored keys: %w", err)
}
pt, err := d.Decrypt(ctx, blobInfo, nil)
if err != nil {
return nil, fmt.Errorf("failed to decrypt encrypted stored keys: %w", err)
}
return pt, nil
}
func (d *autoSeal) upgradeRecoveryKey(ctx context.Context) error {
pe, err := d.core.physical.Get(ctx, recoveryKeyPath)
if err != nil {
return fmt.Errorf("failed to fetch recovery key: %w", err)
}
if pe == nil {
return fmt.Errorf("no recovery key found")
}
blobInfo := &wrapping.BlobInfo{}
if err := proto.Unmarshal(pe.Value, blobInfo); err != nil {
return fmt.Errorf("failed to proto decode recovery key: %w", err)
}
keyId, err := d.Access.KeyId(ctx)
if err != nil {
return err
}
if blobInfo.KeyInfo != nil && blobInfo.KeyInfo.KeyId != keyId {
d.logger.Info("upgrading recovery key")
pt, err := d.Decrypt(ctx, blobInfo, nil)
if err != nil {
return fmt.Errorf("failed to decrypt encrypted recovery key: %w", err)
}
if err := d.SetRecoveryKey(ctx, pt); err != nil {
return fmt.Errorf("failed to save upgraded recovery key: %w", err)
}
}
return nil
}
// migrateRecoveryConfig is a helper func to migrate the recovery config to
// live outside the barrier. This is called from SetRecoveryConfig which is
// always called with the stateLock.
func (d *autoSeal) migrateRecoveryConfig(ctx context.Context) error {
// Get config from the old recoverySealConfigPath path
be, err := d.core.barrier.Get(ctx, recoverySealConfigPath)
if err != nil {
return fmt.Errorf("failed to read old recovery seal configuration during migration: %w", err)
}
// If this entry is nil, then skip migration
if be == nil {
return nil
}
// Only log if we are performing the migration
d.logger.Debug("migrating recovery seal configuration")
defer d.logger.Debug("done migrating recovery seal configuration")
// Perform migration
pe := &physical.Entry{
Key: recoverySealConfigPlaintextPath,
Value: be.Value,
}
if err := d.core.physical.Put(ctx, pe); err != nil {
return fmt.Errorf("failed to write recovery seal configuration during migration: %w", err)
}
// Perform deletion of the old entry
if err := d.core.barrier.Delete(ctx, recoverySealConfigPath); err != nil {
return fmt.Errorf("failed to delete old recovery seal configuration during migration: %w", err)
}
return nil
}
// StartHealthCheck starts a goroutine that tests the health of the auto-unseal backend once every 10 minutes.
// If unhealthy, logs a warning on the condition and begins testing every one minute until healthy again.
func (d *autoSeal) StartHealthCheck() {
d.StopHealthCheck()
d.hcLock.Lock()
defer d.hcLock.Unlock()
healthCheck := time.NewTicker(sealHealthTestIntervalNominal)
d.healthCheckStop = make(chan struct{})
healthCheckStop := d.healthCheckStop
ctx := d.core.activeContext
go func() {
lastTestOk := true
lastSeenOk := time.Now()
fail := func(msg string, args ...interface{}) {
d.logger.Warn(msg, args...)
if lastTestOk {
healthCheck.Reset(sealHealthTestIntervalUnhealthy)
}
lastTestOk = false
d.core.MetricSink().SetGauge(autoSealUnavailableDuration, float32(time.Since(lastSeenOk).Milliseconds()))
}
for {
select {
case <-healthCheckStop:
if healthCheck != nil {
healthCheck.Stop()
}
healthCheckStop = nil
return
case t := <-healthCheck.C:
func() {
ctx, cancel := context.WithTimeout(ctx, sealHealthTestTimeout)
defer cancel()
testVal := fmt.Sprintf("Heartbeat %d", mathrand.Intn(1000))
ciphertext, err := d.Access.Encrypt(ctx, []byte(testVal), nil)
if err != nil {
fail("failed to encrypt seal health test value, seal backend may be unreachable", "error", err)
} else {
func() {
ctx, cancel := context.WithTimeout(ctx, sealHealthTestTimeout)
defer cancel()
plaintext, err := d.Access.Decrypt(ctx, ciphertext, nil)
if err != nil {
fail("failed to decrypt seal health test value, seal backend may be unreachable", "error", err)
}
if !bytes.Equal([]byte(testVal), plaintext) {
fail("seal health test value failed to decrypt to expected value")
} else {
d.logger.Debug("seal health test passed")
if !lastTestOk {
d.logger.Info("seal backend is now healthy again", "downtime", t.Sub(lastSeenOk).String())
healthCheck.Reset(sealHealthTestIntervalNominal)
}
lastTestOk = true
lastSeenOk = t
d.core.MetricSink().SetGauge(autoSealUnavailableDuration, 0)
}
}()
}
}()
}
}
}()
}
func (d *autoSeal) StopHealthCheck() {
d.hcLock.Lock()
defer d.hcLock.Unlock()
if d.healthCheckStop != nil {
close(d.healthCheckStop)
d.healthCheckStop = nil
}
}