open-vault/vault/core.go

3204 lines
98 KiB
Go
Raw Normal View History

2015-03-09 23:33:27 +00:00
package vault
import (
"context"
"crypto/ecdsa"
"crypto/rand"
"crypto/subtle"
"crypto/tls"
"crypto/x509"
"encoding/json"
2015-03-10 00:45:34 +00:00
"errors"
2015-03-09 23:33:27 +00:00
"fmt"
"io"
"net"
"net/http"
"net/url"
"os"
2017-04-04 00:52:29 +00:00
"path/filepath"
"strings"
2015-03-10 00:45:34 +00:00
"sync"
2017-10-23 20:06:27 +00:00
"sync/atomic"
2015-04-08 23:43:17 +00:00
"time"
2015-03-09 23:33:27 +00:00
"github.com/armon/go-metrics"
"github.com/hashicorp/errwrap"
log "github.com/hashicorp/go-hclog"
wrapping "github.com/hashicorp/go-kms-wrapping"
aeadwrapper "github.com/hashicorp/go-kms-wrapping/wrappers/aead"
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-secure-stdlib/mlock"
"github.com/hashicorp/go-secure-stdlib/reloadutil"
"github.com/hashicorp/go-secure-stdlib/strutil"
"github.com/hashicorp/go-secure-stdlib/tlsutil"
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
"github.com/hashicorp/go-uuid"
"github.com/hashicorp/vault/api"
"github.com/hashicorp/vault/audit"
"github.com/hashicorp/vault/command/server"
"github.com/hashicorp/vault/helper/metricsutil"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/physical/raft"
"github.com/hashicorp/vault/sdk/helper/certutil"
"github.com/hashicorp/vault/sdk/helper/consts"
"github.com/hashicorp/vault/sdk/helper/jsonutil"
"github.com/hashicorp/vault/sdk/helper/logging"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/vault/sdk/physical"
"github.com/hashicorp/vault/sdk/version"
sr "github.com/hashicorp/vault/serviceregistration"
2015-03-11 18:34:08 +00:00
"github.com/hashicorp/vault/shamir"
"github.com/hashicorp/vault/vault/cluster"
2020-06-26 21:13:16 +00:00
"github.com/hashicorp/vault/vault/quotas"
vaultseal "github.com/hashicorp/vault/vault/seal"
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
"github.com/patrickmn/go-cache"
uberAtomic "go.uber.org/atomic"
"google.golang.org/grpc"
2015-03-09 23:33:27 +00:00
)
2015-03-10 00:45:34 +00:00
const (
// CoreLockPath is the path used to acquire a coordinating lock
2015-04-14 21:06:15 +00:00
// for a highly-available deploy.
CoreLockPath = "core/lock"
2015-04-14 21:06:15 +00:00
2017-03-03 20:00:46 +00:00
// The poison pill is used as a check during certain scenarios to indicate
// to standby nodes that they should seal
poisonPillPath = "core/poison-pill"
poisonPillDRPath = "core/poison-pill-dr"
2017-03-03 20:00:46 +00:00
2015-04-14 23:44:48 +00:00
// coreLeaderPrefix is the prefix used for the UUID that contains
// the currently elected leader.
coreLeaderPrefix = "core/leader/"
// coreKeyringCanaryPath is used as a canary to indicate to replicated
// clusters that they need to perform a rekey operation synchronously; this
// isn't keyring-canary to avoid ignoring it when ignoring core/keyring
coreKeyringCanaryPath = "core/canary-keyring"
indexHeaderHMACKeyPath = "core/index-header-hmac-key"
// defaultMFAAuthResponseTTL is the default duration that Vault caches the
// MfaAuthResponse when the value is not specified in the server config
defaultMFAAuthResponseTTL = 300 * time.Second
// ForwardSSCTokenToActive is the value that must be set in the
// forwardToActive to trigger forwarding if a perf standby encounters
// an SSC Token that it does not have the WAL state for.
ForwardSSCTokenToActive = "new_token"
2015-03-10 00:45:34 +00:00
)
var (
// ErrAlreadyInit is returned if the core is already
// initialized. This prevents a re-initialization.
ErrAlreadyInit = errors.New("Vault is already initialized")
// ErrNotInit is returned if a non-initialized barrier
// is attempted to be unsealed.
ErrNotInit = errors.New("Vault is not initialized")
// ErrInternalError is returned when we don't want to leak
// any information about an internal error
ErrInternalError = errors.New("internal error")
2015-04-14 23:53:40 +00:00
// ErrHANotEnabled is returned if the operation only makes sense
// in an HA setting
ErrHANotEnabled = errors.New("Vault is not configured for highly-available mode")
// manualStepDownSleepPeriod is how long to sleep after a user-initiated
// step down of the active node, to prevent instantly regrabbing the lock.
// It's var not const so that tests can manipulate it.
manualStepDownSleepPeriod = 10 * time.Second
// Functions only in the Enterprise version
enterprisePostUnseal = enterprisePostUnsealImpl
enterprisePreSeal = enterprisePreSealImpl
enterpriseSetupFilteredPaths = enterpriseSetupFilteredPathsImpl
2020-06-26 21:13:16 +00:00
enterpriseSetupQuotas = enterpriseSetupQuotasImpl
2021-10-07 17:25:16 +00:00
enterpriseSetupAPILock = setupAPILockImpl
startReplication = startReplicationImpl
stopReplication = stopReplicationImpl
LastWAL = lastWALImpl
LastPerformanceWAL = lastPerformanceWALImpl
2021-10-26 21:17:20 +00:00
LastDRWAL = lastDRWALImpl
PerformanceMerkleRoot = merkleRootImpl
DRMerkleRoot = merkleRootImpl
LastRemoteWAL = lastRemoteWALImpl
LastRemoteUpstreamWAL = lastRemoteUpstreamWALImpl
WaitUntilWALShipped = waitUntilWALShippedImpl
storedLicenseCheck = func(c *Core, conf *CoreConfig) error { return nil }
LicenseAutoloaded = func(*Core) bool { return false }
LicenseInitCheck = func(*Core) error { return nil }
LicenseSummary = func(*Core) (*LicenseState, error) { return nil, nil }
LicenseReload = func(*Core) error { return nil }
2015-03-10 00:45:34 +00:00
)
2016-04-04 14:44:22 +00:00
// NonFatalError is an error that can be returned during NewCore that should be
// displayed but not cause a program exit
type NonFatalError struct {
Err error
2015-03-09 23:33:27 +00:00
}
2016-04-04 14:44:22 +00:00
func (e *NonFatalError) WrappedErrors() []error {
return []error{e.Err}
2015-03-10 00:45:34 +00:00
}
2016-04-04 14:44:22 +00:00
func (e *NonFatalError) Error() string {
return e.Err.Error()
2015-03-10 00:45:34 +00:00
}
// NewNonFatalError returns a new non-fatal error.
func NewNonFatalError(err error) *NonFatalError {
return &NonFatalError{Err: err}
}
// IsFatalError returns true if the given error is a fatal error.
func IsFatalError(err error) bool {
return !errwrap.ContainsType(err, new(NonFatalError))
}
// ErrInvalidKey is returned if there is a user-based error with a provided
// unseal key. This will be shown to the user, so should not contain
// information that is sensitive.
2015-03-12 18:20:27 +00:00
type ErrInvalidKey struct {
Reason string
}
func (e *ErrInvalidKey) Error() string {
return fmt.Sprintf("invalid key: %v", e.Reason)
}
2018-09-18 03:03:00 +00:00
type RegisterAuthFunc func(context.Context, time.Duration, string, *logical.Auth) error
type activeAdvertisement struct {
RedirectAddr string `json:"redirect_addr"`
ClusterAddr string `json:"cluster_addr,omitempty"`
ClusterCert []byte `json:"cluster_cert,omitempty"`
ClusterKeyParams *certutil.ClusterKeyParams `json:"cluster_key_params,omitempty"`
}
type unlockInformation struct {
Parts [][]byte
Nonce string
}
type raftInformation struct {
challenge *wrapping.EncryptedBlobInfo
leaderClient *api.Client
leaderBarrierConfig *SealConfig
nonVoter bool
joinInProgress bool
}
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
type migrationInformation struct {
// seal to use during a migration operation. It is the
// seal we're migrating *from*.
seal Seal
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
// unsealKey was the unseal key provided for the migration seal.
// This will be set as the recovery key when migrating from shamir to auto-seal.
// We don't need to do anything with it when migrating auto->shamir because
// we don't store the shamir combined key for shamir seals, nor when
// migrating auto->auto because then the recovery key doesn't change.
unsealKey []byte
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
2015-03-09 23:33:27 +00:00
// Core is used as the central manager of Vault activity. It is the primary point of
// interface for API handlers and is responsible for managing the logical and physical
// backends, router, security barrier, and audit trails.
type Core struct {
2018-09-18 03:03:00 +00:00
entCore
2018-11-07 01:21:24 +00:00
// The registry of builtin plugins is passed in here as an interface because
// if it's used directly, it results in import cycles.
builtinRegistry BuiltinRegistry
// N.B.: This is used to populate a dev token down replication, as
// otherwise, after replication is started, a dev would have to go through
// the generate-root process simply to talk to the new follower cluster.
devToken string
2015-04-14 21:06:15 +00:00
// HABackend may be available depending on the physical backend
ha physical.HABackend
// storageType is the the storage type set in the storage configuration
storageType string
// redirectAddr is the address we advertise as leader if held
redirectAddr string
// clusterAddr is the address we use for clustering
clusterAddr *atomic.Value
2015-04-14 23:44:48 +00:00
2015-03-09 23:33:27 +00:00
// physical backend is the un-trusted backend with durable data
physical physical.Backend
// serviceRegistration is the ServiceRegistration network
serviceRegistration sr.ServiceRegistration
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
// underlyingPhysical will always point to the underlying backend
// implementation. This is an un-trusted backend with durable data
underlyingPhysical physical.Backend
2018-10-23 06:34:02 +00:00
// seal is our seal, for seal configuration information
2016-04-04 14:44:22 +00:00
seal Seal
// raftJoinDoneCh is used by the raft retry join routine to inform unseal process
// that the join is complete
raftJoinDoneCh chan struct{}
// postUnsealStarted informs the raft retry join routine that unseal key
// validation is completed and post unseal has started so that it can complete
// the join process when Shamir seal is in use
postUnsealStarted *uint32
// raftInfo will contain information required for this node to join as a
// peer to an existing raft cluster
raftInfo *raftInformation
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
// migrationInfo is used during (and possibly after) a seal migration.
// This contains information about the seal we are migrating *from*. Even
// post seal migration, provided the old seal is still in configuration
// migrationInfo will be populated, which on enterprise may be necessary for
// seal rewrap.
migrationInfo *migrationInformation
sealMigrationDone *uint32
2015-03-09 23:33:27 +00:00
// barrier is the security barrier wrapping the physical backend
barrier SecurityBarrier
// router is responsible for managing the mount points for logical backends.
router *Router
2015-03-10 00:45:34 +00:00
// logicalBackends is the mapping of backends to use for this core
logicalBackends map[string]logical.Factory
// credentialBackends is the mapping of backends to use for this core
credentialBackends map[string]logical.Factory
2015-03-15 23:25:38 +00:00
// auditBackends is the mapping of backends to use for this core
auditBackends map[string]audit.Factory
2015-03-10 00:45:34 +00:00
// stateLock protects mutable state
stateLock DeadlockRWMutex
sealed *uint32
2015-03-10 00:45:34 +00:00
standby bool
perfStandby bool
standbyDoneCh chan struct{}
standbyStopCh *atomic.Value
manualStepDownCh chan struct{}
keepHALockOnStepDown *uint32
heldHALock physical.Lock
2015-04-14 21:06:15 +00:00
// shutdownDoneCh is used to notify when Shutdown() completes
shutdownDoneCh chan struct{}
// unlockInfo has the keys provided to Unseal until the threshold number of parts is available, as well as the operation nonce
unlockInfo *unlockInformation
2015-03-10 00:45:34 +00:00
2016-01-15 15:55:35 +00:00
// generateRootProgress holds the shares until we reach enough
// to verify the master key
2016-01-15 15:55:35 +00:00
generateRootConfig *GenerateRootConfig
generateRootProgress [][]byte
generateRootLock sync.Mutex
2016-04-04 14:44:22 +00:00
// These variables holds the config and shares we have until we reach
// enough to verify the appropriate master key. Note that the same lock is
// used; this isn't time-critical so this shouldn't be a problem.
2018-05-21 21:46:32 +00:00
barrierRekeyConfig *SealConfig
recoveryRekeyConfig *SealConfig
rekeyLock sync.RWMutex
2015-05-28 18:40:01 +00:00
2015-03-11 22:19:41 +00:00
// mounts is loaded after unseal since it is a protected
// configuration
mounts *MountTable
2015-03-11 22:19:41 +00:00
// mountsLock is used to ensure that the mounts table does not
// change underneath a calling function
mountsLock sync.RWMutex
// mountMigrationTracker tracks past and ongoing remount operations
// against their migration ids
mountMigrationTracker *sync.Map
// auth is loaded after unseal since it is a protected
// configuration
auth *MountTable
// authLock is used to ensure that the auth table does not
// change underneath a calling function
authLock sync.RWMutex
// audit is loaded after unseal since it is a protected
// configuration
audit *MountTable
// auditLock is used to ensure that the audit table does not
// change underneath a calling function
auditLock sync.RWMutex
// auditBroker is used to ingest the audit events and fan
// out into the configured audit backends
auditBroker *AuditBroker
// auditedHeaders is used to configure which http headers
// can be output in the audit logs
auditedHeaders *AuditedHeadersConfig
// systemBackend is the backend which is used to manage internal operations
systemBackend *SystemBackend
loginMFABackend *LoginMFABackend
2018-09-18 03:03:00 +00:00
// cubbyholeBackend is the backend which manages the per-token storage
cubbyholeBackend *CubbyholeBackend
// systemBarrierView is the barrier view for the system backend
systemBarrierView *BarrierView
2015-03-12 19:41:12 +00:00
// expiration manager is used for managing LeaseIDs,
// renewal, expiration and revocation
expiration *ExpirationManager
// rollback manager is used to run rollbacks periodically
rollback *RollbackManager
// policy store is used to manage named ACL policies
policyStore *PolicyStore
2015-03-23 20:41:05 +00:00
// token store is used to manage authentication tokens
tokenStore *TokenStore
// identityStore is used to manage client entities
identityStore *IdentityStore
// activityLog is used to track active client count
activityLog *ActivityLog
2015-04-08 23:43:17 +00:00
// metricsCh is used to stop the metrics streaming
metricsCh chan struct{}
// metricsMutex is used to prevent a race condition between
// metrics emission and sealing leading to a nil pointer
metricsMutex sync.Mutex
// inFlightReqMap is used to store info about in-flight requests
inFlightReqData *InFlightRequests
// mfaResponseAuthQueue is used to cache the auth response per request ID
mfaResponseAuthQueue *LoginMFAPriorityQueue
mfaResponseAuthQueueLock sync.Mutex
// metricSink is the destination for all metrics that have
// a cluster label.
metricSink *metricsutil.ClusterMetricSink
defaultLeaseTTL time.Duration
maxLeaseTTL time.Duration
2018-08-23 19:04:18 +00:00
// baseLogger is used to avoid ResetNamed as it strips useful prefixes in
// e.g. testing
baseLogger log.Logger
logger log.Logger
// Disables the trace display for Sentinel checks
sentinelTraceDisabled bool
// cachingDisabled indicates whether caches are disabled
cachingDisabled bool
// Cache stores the actual cache; we always have this but may bypass it if
// disabled
physicalCache physical.ToggleablePurgemonster
// logRequestsLevel indicates at which level requests should be logged
logRequestsLevel *uberAtomic.Int32
2016-09-30 04:06:40 +00:00
// reloadFuncs is a map containing reload functions
reloadFuncs map[string][]reloadutil.ReloadFunc
2016-09-30 04:06:40 +00:00
// reloadFuncsLock controls access to the funcs
2016-09-30 04:06:40 +00:00
reloadFuncsLock sync.RWMutex
2017-01-04 21:44:03 +00:00
// wrappingJWTKey is the key used for generating JWTs containing response
// wrapping information
wrappingJWTKey *ecdsa.PrivateKey
//
// Cluster information
//
// Name
clusterName string
// ID
clusterID uberAtomic.String
// Specific cipher suites to use for clustering, if any
clusterCipherSuites []uint16
// Used to modify cluster parameters
clusterParamsLock sync.RWMutex
// The private key stored in the barrier used for establishing
// mutually-authenticated connections between Vault cluster members
localClusterPrivateKey *atomic.Value
// The local cluster cert
localClusterCert *atomic.Value
// The parsed form of the local cluster cert
localClusterParsedCert *atomic.Value
2016-08-19 15:03:53 +00:00
// The TCP addresses we should use for clustering
clusterListenerAddrs []*net.TCPAddr
// The handler to use for request forwarding
clusterHandler http.Handler
// Write lock used to ensure that we don't have multiple connections adjust
// this value at the same time
requestForwardingConnectionLock sync.RWMutex
// Lock for the leader values, ensuring we don't run the parts of Leader()
// that change things concurrently
leaderParamsLock sync.RWMutex
// Current cluster leader values
clusterLeaderParams *atomic.Value
2017-05-25 00:51:53 +00:00
// Info on cluster members
2017-05-25 01:10:32 +00:00
clusterPeerClusterAddrsCache *cache.Cache
// The context for the client
rpcClientConnContext context.Context
2016-08-19 15:03:53 +00:00
// The function for canceling the client connection
rpcClientConnCancelFunc context.CancelFunc
// The grpc ClientConn for RPC calls
rpcClientConn *grpc.ClientConn
// The grpc forwarding client
rpcForwardingClient *forwardingClient
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
// The UUID used to hold the leader lock. Only set on active node
leaderUUID string
2017-06-17 04:04:55 +00:00
// CORS Information
corsConfig *CORSConfig
2017-10-23 20:03:36 +00:00
// The active set of upstream cluster addresses; stored via the Echo
// mechanism, loaded by the balancer
atomicPrimaryClusterAddrs *atomic.Value
atomicPrimaryFailoverAddrs *atomic.Value
2018-09-18 03:03:00 +00:00
// replicationState keeps the current replication state cached for quick
// lookup; activeNodeReplicationState stores the active value on standbys
replicationState *uint32
activeNodeReplicationState *uint32
2017-02-24 15:45:29 +00:00
// uiConfig contains UI configuration
uiConfig *UIConfig
2017-04-04 00:52:29 +00:00
// rawEnabled indicates whether the Raw endpoint is enabled
rawEnabled bool
2017-04-11 00:12:52 +00:00
// pluginDirectory is the location vault will look for plugin binaries
2017-04-04 00:52:29 +00:00
pluginDirectory string
// pluginCatalog is used to manage plugin configurations
pluginCatalog *PluginCatalog
2017-04-11 00:12:52 +00:00
2017-04-24 19:21:49 +00:00
enableMlock bool
// This can be used to trigger operations to stop running when Vault is
// going to be shut down, stepped down, or sealed
activeContext context.Context
activeContextCancelFunc *atomic.Value
// Stores the sealunwrapper for downgrade needs
sealUnwrapper physical.Backend
// unsealwithStoredKeysLock is a mutex that prevents multiple processes from
// unsealing with stored keys are the same time.
unsealWithStoredKeysLock sync.Mutex
// Stores any funcs that should be run on successful postUnseal
postUnsealFuncs []func()
Recovery Mode (#7559) * Initial work * rework * s/dr/recovery * Add sys/raw support to recovery mode (#7577) * Factor the raw paths out so they can be run with a SystemBackend. # Conflicts: # vault/logical_system.go * Add handleLogicalRecovery which is like handleLogical but is only sufficient for use with the sys-raw endpoint in recovery mode. No authentication is done yet. * Integrate with recovery-mode. We now handle unauthenticated sys/raw requests, albeit on path v1/raw instead v1/sys/raw. * Use sys/raw instead raw during recovery. * Don't bother persisting the recovery token. Authenticate sys/raw requests with it. * RecoveryMode: Support generate-root for autounseals (#7591) * Recovery: Abstract config creation and log settings * Recovery mode integration test. (#7600) * Recovery: Touch up (#7607) * Recovery: Touch up * revert the raw backend creation changes * Added recovery operation token prefix * Move RawBackend to its own file * Update API path and hit it using CLI flag on generate-root * Fix a panic triggered when handling a request that yields a nil response. (#7618) * Improve integ test to actually make changes while in recovery mode and verify they're still there after coming back in regular mode. * Refuse to allow a second recovery token to be generated. * Resize raft cluster to size 1 and start as leader (#7626) * RecoveryMode: Setup raft cluster post unseal (#7635) * Setup raft cluster post unseal in recovery mode * Remove marking as unsealed as its not needed * Address review comments * Accept only one seal config in recovery mode as there is no scope for migration
2019-10-15 04:55:31 +00:00
// Stores any funcs that should be run on successful barrier unseal in
// recovery mode
postRecoveryUnsealFuncs []func() error
2018-09-18 03:03:00 +00:00
// replicationFailure is used to mark when replication has entered an
// unrecoverable failure.
replicationFailure *uint32
// disablePerfStanby is used to tell a standby not to attempt to become a
// perf standby
disablePerfStandby bool
licensingStopCh chan struct{}
// Stores loggers so we can reset the level
allLoggers []log.Logger
allLoggersLock sync.RWMutex
// Can be toggled atomically to cause the core to never try to become
// active, or give up active as soon as it gets it
neverBecomeActive *uint32
// loadCaseSensitiveIdentityStore enforces the loading of identity store
// artifacts in a case sensitive manner. To be used only in testing.
loadCaseSensitiveIdentityStore bool
// clusterListener starts up and manages connections on the cluster ports
clusterListener *atomic.Value
2019-02-20 20:12:21 +00:00
// customListenerHeader holds custom response headers for a listener
customListenerHeader *atomic.Value
// Telemetry objects
2019-02-20 20:12:21 +00:00
metricsHelper *metricsutil.MetricsHelper
Autopilot: Server Stabilization, State and Dead Server Cleanup (#10856) * k8s doc: update for 0.9.1 and 0.8.0 releases (#10825) * k8s doc: update for 0.9.1 and 0.8.0 releases * Update website/content/docs/platform/k8s/helm/configuration.mdx Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> * Autopilot initial commit * Move autopilot related backend implementations to its own file * Abstract promoter creation * Add nil check for health * Add server state oss no-ops * Config ext stub for oss * Make way for non-voters * s/health/state * s/ReadReplica/NonVoter * Add synopsis and description * Remove struct tags from AutopilotConfig * Use var for config storage path * Handle nin-config when reading * Enable testing autopilot by using inmem cluster * First passing test * Only report the server as known if it is present in raft config * Autopilot defaults to on for all existing and new clusters * Add locking to some functions * Persist initial config * Clarify the command usage doc * Add health metric for each node * Fix audit logging issue * Don't set DisablePerformanceStandby to true in test * Use node id label for health metric * Log updates to autopilot config * Less aggressively consume config loading failures * Return a mutable config * Return early from known servers if raft config is unable to be pulled * Update metrics name * Reduce log level for potentially noisy log * Add knob to disable autopilot * Don't persist if default config is in use * Autopilot: Dead server cleanup (#10857) * Dead server cleanup * Initialize channel in any case * Fix a bunch of tests * Fix panic * Add follower locking in heartbeat tracker * Add LastContactFailureThreshold to config * Add log when marking node as dead * Update follower state locking in heartbeat tracker * Avoid follower states being nil * Pull test to its own file * Add execution status to state response * Optionally enable autopilot in some tests * Updates * Added API function to fetch autopilot configuration * Add test for default autopilot configuration * Configuration tests * Add State API test * Update test * Added TestClusterOptions.PhysicalFactoryConfig * Update locking * Adjust locking in heartbeat tracker * s/last_contact_failure_threshold/left_server_last_contact_threshold * Add disabling autopilot as a core config option * Disable autopilot in some tests * s/left_server_last_contact_threshold/dead_server_last_contact_threshold * Set the lastheartbeat of followers to now when setting up active node * Don't use config defaults from CLI command * Remove config file support * Remove HCL test as well * Persist only supplied config; merge supplied config with default to operate * Use pointer to structs for storing follower information * Test update * Retrieve non voter status from configbucket and set it up when a node comes up * Manage desired suffrage * Consider bucket being created already * Move desired suffrage to its own entry * s/DesiredSuffrageKey/LocalNodeConfigKey * s/witnessSuffrage/recordSuffrage * Fix test compilation * Handle local node config post a snapshot install * Commit to storage first; then record suffrage in fsm * No need of local node config being nili case, post snapshot restore * Reconcile autopilot config when a new leader takes over duty * Grab fsm lock when recording suffrage * s/Suffrage/DesiredSuffrage in FollowerState * Instantiate autopilot only in leader * Default to old ways in more scenarios * Make API gracefully handle 404 * Address some feedback * Make IsDead an atomic.Value * Simplify follower hearbeat tracking * Use uber.atomic * Don't have multiple causes for having autopilot disabled * Don't remove node from follower states if we fail to remove the dead server * Autopilot server removals map (#11019) * Don't remove node from follower states if we fail to remove the dead server * Use map to track dead server removals * Use lock and map * Use delegate lock * Adjust when to remove entry from map * Only hold the lock while accessing map * Fix race * Don't set default min_quorum * Fix test * Ensure follower states is not nil before starting autopilot * Fix race Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> Co-authored-by: Theron Voran <tvoran@users.noreply.github.com>
2021-03-03 18:59:50 +00:00
// raftFollowerStates tracks information about all the raft follower nodes.
raftFollowerStates *raft.FollowerStates
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
// Stop channel for raft TLS rotations
raftTLSRotationStopCh chan struct{}
// Stores the pending peers we are waiting to give answers
pendingRaftPeers *sync.Map
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
// rawConfig stores the config as-is from the provided server configuration.
rawConfig *atomic.Value
coreNumber int
Recovery Mode (#7559) * Initial work * rework * s/dr/recovery * Add sys/raw support to recovery mode (#7577) * Factor the raw paths out so they can be run with a SystemBackend. # Conflicts: # vault/logical_system.go * Add handleLogicalRecovery which is like handleLogical but is only sufficient for use with the sys-raw endpoint in recovery mode. No authentication is done yet. * Integrate with recovery-mode. We now handle unauthenticated sys/raw requests, albeit on path v1/raw instead v1/sys/raw. * Use sys/raw instead raw during recovery. * Don't bother persisting the recovery token. Authenticate sys/raw requests with it. * RecoveryMode: Support generate-root for autounseals (#7591) * Recovery: Abstract config creation and log settings * Recovery mode integration test. (#7600) * Recovery: Touch up (#7607) * Recovery: Touch up * revert the raw backend creation changes * Added recovery operation token prefix * Move RawBackend to its own file * Update API path and hit it using CLI flag on generate-root * Fix a panic triggered when handling a request that yields a nil response. (#7618) * Improve integ test to actually make changes while in recovery mode and verify they're still there after coming back in regular mode. * Refuse to allow a second recovery token to be generated. * Resize raft cluster to size 1 and start as leader (#7626) * RecoveryMode: Setup raft cluster post unseal (#7635) * Setup raft cluster post unseal in recovery mode * Remove marking as unsealed as its not needed * Address review comments * Accept only one seal config in recovery mode as there is no scope for migration
2019-10-15 04:55:31 +00:00
// secureRandomReader is the reader used for CSP operations
secureRandomReader io.Reader
Recovery Mode (#7559) * Initial work * rework * s/dr/recovery * Add sys/raw support to recovery mode (#7577) * Factor the raw paths out so they can be run with a SystemBackend. # Conflicts: # vault/logical_system.go * Add handleLogicalRecovery which is like handleLogical but is only sufficient for use with the sys-raw endpoint in recovery mode. No authentication is done yet. * Integrate with recovery-mode. We now handle unauthenticated sys/raw requests, albeit on path v1/raw instead v1/sys/raw. * Use sys/raw instead raw during recovery. * Don't bother persisting the recovery token. Authenticate sys/raw requests with it. * RecoveryMode: Support generate-root for autounseals (#7591) * Recovery: Abstract config creation and log settings * Recovery mode integration test. (#7600) * Recovery: Touch up (#7607) * Recovery: Touch up * revert the raw backend creation changes * Added recovery operation token prefix * Move RawBackend to its own file * Update API path and hit it using CLI flag on generate-root * Fix a panic triggered when handling a request that yields a nil response. (#7618) * Improve integ test to actually make changes while in recovery mode and verify they're still there after coming back in regular mode. * Refuse to allow a second recovery token to be generated. * Resize raft cluster to size 1 and start as leader (#7626) * RecoveryMode: Setup raft cluster post unseal (#7635) * Setup raft cluster post unseal in recovery mode * Remove marking as unsealed as its not needed * Address review comments * Accept only one seal config in recovery mode as there is no scope for migration
2019-10-15 04:55:31 +00:00
recoveryMode bool
clusterNetworkLayer cluster.NetworkLayer
// PR1103disabled is used to test upgrade workflows: when set to true,
// the correct behaviour for namespaced cubbyholes is disabled, so we
// can test an upgrade to a version that includes the fixes from
// https://github.com/hashicorp/vault-enterprise/pull/1103
PR1103disabled bool
2020-06-26 21:13:16 +00:00
quotaManager *quotas.Manager
clusterHeartbeatInterval time.Duration
activityLogConfig ActivityLogCoreConfig
// activeTime is set on active nodes indicating the time at which this node
// became active.
activeTime time.Time
// KeyRotateGracePeriod is how long we allow an upgrade path
// for standby instances before we delete the upgrade keys
keyRotateGracePeriod *int64
Vault-1403 Switch Expiration Manager to use Fairsharing Backpressure (#1709) (#10932) * basic pool and start testing * refactor a bit for testing * workFunc, start/stop safety, testing * cleanup function for worker quit, more tests * redo public/private members * improve tests, export types, switch uuid package * fix loop capture bug, cleanup * cleanup tests * update worker pool file name, other improvements * add job manager prototype * remove remnants * add functions to wait for job manager and worker pool to stop, other fixes * test job manager functionality, fix bugs * encapsulate how jobs are distributed to workers * make worker job channel read only * add job interface, more testing, fixes * set name for dispatcher * fix test races * wire up expiration manager most of the way * dispatcher and job manager constructors don't return errors * logger now dependency injected * make some members private, test fcn to get worker pool size * make GetNumWorkers public * Update helper/fairshare/jobmanager_test.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * update fairsharing usage, add tests * make workerpool private * remove custom worker names * concurrency improvements * remove worker pool cleanup function * remove cleanup func from job manager, remove non blocking stop from fairshare * update job manager for new constructor * stop job manager when expiration manager stopped * unset env var after test * stop fairshare when started in tests * stop leaking job manager goroutine * prototype channel for waking up to assign work * fix typo/bug and add tests * improve job manager wake up, fix test typo * put channel drain back * better start/pause test for job manager * comment cleanup * degrade possible noisy log * remove closure, clean up context * improve revocation context timer * test: reduce number of revocation workers during many tests * Update vault/expiration.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * feedback tweaks Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2021-02-17 22:30:27 +00:00
autoRotateCancel context.CancelFunc
Vault-1403 Switch Expiration Manager to use Fairsharing Backpressure (#1709) (#10932) * basic pool and start testing * refactor a bit for testing * workFunc, start/stop safety, testing * cleanup function for worker quit, more tests * redo public/private members * improve tests, export types, switch uuid package * fix loop capture bug, cleanup * cleanup tests * update worker pool file name, other improvements * add job manager prototype * remove remnants * add functions to wait for job manager and worker pool to stop, other fixes * test job manager functionality, fix bugs * encapsulate how jobs are distributed to workers * make worker job channel read only * add job interface, more testing, fixes * set name for dispatcher * fix test races * wire up expiration manager most of the way * dispatcher and job manager constructors don't return errors * logger now dependency injected * make some members private, test fcn to get worker pool size * make GetNumWorkers public * Update helper/fairshare/jobmanager_test.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * update fairsharing usage, add tests * make workerpool private * remove custom worker names * concurrency improvements * remove worker pool cleanup function * remove cleanup func from job manager, remove non blocking stop from fairshare * update job manager for new constructor * stop job manager when expiration manager stopped * unset env var after test * stop fairshare when started in tests * stop leaking job manager goroutine * prototype channel for waking up to assign work * fix typo/bug and add tests * improve job manager wake up, fix test typo * put channel drain back * better start/pause test for job manager * comment cleanup * degrade possible noisy log * remove closure, clean up context * improve revocation context timer * test: reduce number of revocation workers during many tests * Update vault/expiration.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * feedback tweaks Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2021-02-17 22:30:27 +00:00
// number of workers to use for lease revocation in the expiration manager
numExpirationWorkers int
IndexHeaderHMACKey uberAtomic.Value
Autopilot: Server Stabilization, State and Dead Server Cleanup (#10856) * k8s doc: update for 0.9.1 and 0.8.0 releases (#10825) * k8s doc: update for 0.9.1 and 0.8.0 releases * Update website/content/docs/platform/k8s/helm/configuration.mdx Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> * Autopilot initial commit * Move autopilot related backend implementations to its own file * Abstract promoter creation * Add nil check for health * Add server state oss no-ops * Config ext stub for oss * Make way for non-voters * s/health/state * s/ReadReplica/NonVoter * Add synopsis and description * Remove struct tags from AutopilotConfig * Use var for config storage path * Handle nin-config when reading * Enable testing autopilot by using inmem cluster * First passing test * Only report the server as known if it is present in raft config * Autopilot defaults to on for all existing and new clusters * Add locking to some functions * Persist initial config * Clarify the command usage doc * Add health metric for each node * Fix audit logging issue * Don't set DisablePerformanceStandby to true in test * Use node id label for health metric * Log updates to autopilot config * Less aggressively consume config loading failures * Return a mutable config * Return early from known servers if raft config is unable to be pulled * Update metrics name * Reduce log level for potentially noisy log * Add knob to disable autopilot * Don't persist if default config is in use * Autopilot: Dead server cleanup (#10857) * Dead server cleanup * Initialize channel in any case * Fix a bunch of tests * Fix panic * Add follower locking in heartbeat tracker * Add LastContactFailureThreshold to config * Add log when marking node as dead * Update follower state locking in heartbeat tracker * Avoid follower states being nil * Pull test to its own file * Add execution status to state response * Optionally enable autopilot in some tests * Updates * Added API function to fetch autopilot configuration * Add test for default autopilot configuration * Configuration tests * Add State API test * Update test * Added TestClusterOptions.PhysicalFactoryConfig * Update locking * Adjust locking in heartbeat tracker * s/last_contact_failure_threshold/left_server_last_contact_threshold * Add disabling autopilot as a core config option * Disable autopilot in some tests * s/left_server_last_contact_threshold/dead_server_last_contact_threshold * Set the lastheartbeat of followers to now when setting up active node * Don't use config defaults from CLI command * Remove config file support * Remove HCL test as well * Persist only supplied config; merge supplied config with default to operate * Use pointer to structs for storing follower information * Test update * Retrieve non voter status from configbucket and set it up when a node comes up * Manage desired suffrage * Consider bucket being created already * Move desired suffrage to its own entry * s/DesiredSuffrageKey/LocalNodeConfigKey * s/witnessSuffrage/recordSuffrage * Fix test compilation * Handle local node config post a snapshot install * Commit to storage first; then record suffrage in fsm * No need of local node config being nili case, post snapshot restore * Reconcile autopilot config when a new leader takes over duty * Grab fsm lock when recording suffrage * s/Suffrage/DesiredSuffrage in FollowerState * Instantiate autopilot only in leader * Default to old ways in more scenarios * Make API gracefully handle 404 * Address some feedback * Make IsDead an atomic.Value * Simplify follower hearbeat tracking * Use uber.atomic * Don't have multiple causes for having autopilot disabled * Don't remove node from follower states if we fail to remove the dead server * Autopilot server removals map (#11019) * Don't remove node from follower states if we fail to remove the dead server * Use map to track dead server removals * Use lock and map * Use delegate lock * Adjust when to remove entry from map * Only hold the lock while accessing map * Fix race * Don't set default min_quorum * Fix test * Ensure follower states is not nil before starting autopilot * Fix race Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> Co-authored-by: Theron Voran <tvoran@users.noreply.github.com>
2021-03-03 18:59:50 +00:00
// disableAutopilot is used to disable the autopilot subsystem in raft storage
disableAutopilot bool
// enable/disable identifying response headers
enableResponseHeaderHostname bool
enableResponseHeaderRaftNodeID bool
// disableSSCTokens is used to disable server side consistent token creation/usage
disableSSCTokens bool
// versionTimestamps is a map of vault versions to timestamps when the version
// was first run. Note that because perf standbys should be upgraded first, and
// only the active node will actually write the new version timestamp, a perf
// standby shouldn't rely on the stored version timestamps being present.
versionTimestamps map[string]time.Time
2015-03-09 23:33:27 +00:00
}
func (c *Core) HAState() consts.HAState {
switch {
case c.perfStandby:
return consts.PerfStandby
case c.standby:
return consts.Standby
default:
return consts.Active
}
}
2015-03-11 18:52:01 +00:00
// CoreConfig is used to parameterize a core
type CoreConfig struct {
2020-02-15 00:39:13 +00:00
entCoreConfig
DevToken string
BuiltinRegistry BuiltinRegistry
2018-11-07 01:21:24 +00:00
LogicalBackends map[string]logical.Factory
CredentialBackends map[string]logical.Factory
AuditBackends map[string]audit.Factory
Physical physical.Backend
StorageType string
// May be nil, which disables HA operations
HAPhysical physical.HABackend
ServiceRegistration sr.ServiceRegistration
// Seal is the configured seal, or if none is configured explicitly, a
// shamir seal. In migration scenarios this is the new seal.
Seal Seal
// Unwrap seal is the optional seal marked "disabled"; this is the old
// seal in migration scenarios.
2020-06-11 19:07:59 +00:00
UnwrapSeal Seal
SecureRandomReader io.Reader
Logger log.Logger
// Disables the trace display for Sentinel checks
DisableSentinelTrace bool
// Disables the LRU cache on the physical backend
DisableCache bool
// Disables mlock syscall
DisableMlock bool
// Custom cache size for the LRU cache on the physical backend, or zero for default
CacheSize int
// Set as the leader address for HA
RedirectAddr string
// Set as the cluster address for HA
ClusterAddr string
DefaultLeaseTTL time.Duration
MaxLeaseTTL time.Duration
ClusterName string
2016-09-30 04:06:40 +00:00
ClusterCipherSuites string
EnableUI bool
2017-02-24 15:45:29 +00:00
// Enable the raw endpoint
EnableRaw bool
PluginDirectory string
2017-04-04 00:52:29 +00:00
DisableSealWrap bool
2018-09-18 03:03:00 +00:00
RawConfig *server.Config
ReloadFuncs *map[string][]reloadutil.ReloadFunc
2016-09-30 04:06:40 +00:00
ReloadFuncsLock *sync.RWMutex
2018-09-18 03:03:00 +00:00
// Licensing
License string
LicensePath string
2018-09-18 03:03:00 +00:00
LicensingConfig *LicensingConfig
DisablePerformanceStandby bool
2018-10-23 19:03:17 +00:00
DisableIndexing bool
DisableKeyEncodingChecks bool
2018-09-18 03:03:00 +00:00
AllLoggers []log.Logger
// Telemetry objects
MetricsHelper *metricsutil.MetricsHelper
MetricSink *metricsutil.ClusterMetricSink
Recovery Mode (#7559) * Initial work * rework * s/dr/recovery * Add sys/raw support to recovery mode (#7577) * Factor the raw paths out so they can be run with a SystemBackend. # Conflicts: # vault/logical_system.go * Add handleLogicalRecovery which is like handleLogical but is only sufficient for use with the sys-raw endpoint in recovery mode. No authentication is done yet. * Integrate with recovery-mode. We now handle unauthenticated sys/raw requests, albeit on path v1/raw instead v1/sys/raw. * Use sys/raw instead raw during recovery. * Don't bother persisting the recovery token. Authenticate sys/raw requests with it. * RecoveryMode: Support generate-root for autounseals (#7591) * Recovery: Abstract config creation and log settings * Recovery mode integration test. (#7600) * Recovery: Touch up (#7607) * Recovery: Touch up * revert the raw backend creation changes * Added recovery operation token prefix * Move RawBackend to its own file * Update API path and hit it using CLI flag on generate-root * Fix a panic triggered when handling a request that yields a nil response. (#7618) * Improve integ test to actually make changes while in recovery mode and verify they're still there after coming back in regular mode. * Refuse to allow a second recovery token to be generated. * Resize raft cluster to size 1 and start as leader (#7626) * RecoveryMode: Setup raft cluster post unseal (#7635) * Setup raft cluster post unseal in recovery mode * Remove marking as unsealed as its not needed * Address review comments * Accept only one seal config in recovery mode as there is no scope for migration
2019-10-15 04:55:31 +00:00
RecoveryMode bool
ClusterNetworkLayer cluster.NetworkLayer
2015-03-11 18:52:01 +00:00
ClusterHeartbeatInterval time.Duration
// Activity log controls
ActivityLogConfig ActivityLogCoreConfig
Vault-1403 Switch Expiration Manager to use Fairsharing Backpressure (#1709) (#10932) * basic pool and start testing * refactor a bit for testing * workFunc, start/stop safety, testing * cleanup function for worker quit, more tests * redo public/private members * improve tests, export types, switch uuid package * fix loop capture bug, cleanup * cleanup tests * update worker pool file name, other improvements * add job manager prototype * remove remnants * add functions to wait for job manager and worker pool to stop, other fixes * test job manager functionality, fix bugs * encapsulate how jobs are distributed to workers * make worker job channel read only * add job interface, more testing, fixes * set name for dispatcher * fix test races * wire up expiration manager most of the way * dispatcher and job manager constructors don't return errors * logger now dependency injected * make some members private, test fcn to get worker pool size * make GetNumWorkers public * Update helper/fairshare/jobmanager_test.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * update fairsharing usage, add tests * make workerpool private * remove custom worker names * concurrency improvements * remove worker pool cleanup function * remove cleanup func from job manager, remove non blocking stop from fairshare * update job manager for new constructor * stop job manager when expiration manager stopped * unset env var after test * stop fairshare when started in tests * stop leaking job manager goroutine * prototype channel for waking up to assign work * fix typo/bug and add tests * improve job manager wake up, fix test typo * put channel drain back * better start/pause test for job manager * comment cleanup * degrade possible noisy log * remove closure, clean up context * improve revocation context timer * test: reduce number of revocation workers during many tests * Update vault/expiration.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * feedback tweaks Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2021-02-17 22:30:27 +00:00
// number of workers to use for lease revocation in the expiration manager
NumExpirationWorkers int
Autopilot: Server Stabilization, State and Dead Server Cleanup (#10856) * k8s doc: update for 0.9.1 and 0.8.0 releases (#10825) * k8s doc: update for 0.9.1 and 0.8.0 releases * Update website/content/docs/platform/k8s/helm/configuration.mdx Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> Co-authored-by: Theron Voran <tvoran@users.noreply.github.com> * Autopilot initial commit * Move autopilot related backend implementations to its own file * Abstract promoter creation * Add nil check for health * Add server state oss no-ops * Config ext stub for oss * Make way for non-voters * s/health/state * s/ReadReplica/NonVoter * Add synopsis and description * Remove struct tags from AutopilotConfig * Use var for config storage path * Handle nin-config when reading * Enable testing autopilot by using inmem cluster * First passing test * Only report the server as known if it is present in raft config * Autopilot defaults to on for all existing and new clusters * Add locking to some functions * Persist initial config * Clarify the command usage doc * Add health metric for each node * Fix audit logging issue * Don't set DisablePerformanceStandby to true in test * Use node id label for health metric * Log updates to autopilot config * Less aggressively consume config loading failures * Return a mutable config * Return early from known servers if raft config is unable to be pulled * Update metrics name * Reduce log level for potentially noisy log * Add knob to disable autopilot * Don't persist if default config is in use * Autopilot: Dead server cleanup (#10857) * Dead server cleanup * Initialize channel in any case * Fix a bunch of tests * Fix panic * Add follower locking in heartbeat tracker * Add LastContactFailureThreshold to config * Add log when marking node as dead * Update follower state locking in heartbeat tracker * Avoid follower states being nil * Pull test to its own file * Add execution status to state response * Optionally enable autopilot in some tests * Updates * Added API function to fetch autopilot configuration * Add test for default autopilot configuration * Configuration tests * Add State API test * Update test * Added TestClusterOptions.PhysicalFactoryConfig * Update locking * Adjust locking in heartbeat tracker * s/last_contact_failure_threshold/left_server_last_contact_threshold * Add disabling autopilot as a core config option * Disable autopilot in some tests * s/left_server_last_contact_threshold/dead_server_last_contact_threshold * Set the lastheartbeat of followers to now when setting up active node * Don't use config defaults from CLI command * Remove config file support * Remove HCL test as well * Persist only supplied config; merge supplied config with default to operate * Use pointer to structs for storing follower information * Test update * Retrieve non voter status from configbucket and set it up when a node comes up * Manage desired suffrage * Consider bucket being created already * Move desired suffrage to its own entry * s/DesiredSuffrageKey/LocalNodeConfigKey * s/witnessSuffrage/recordSuffrage * Fix test compilation * Handle local node config post a snapshot install * Commit to storage first; then record suffrage in fsm * No need of local node config being nili case, post snapshot restore * Reconcile autopilot config when a new leader takes over duty * Grab fsm lock when recording suffrage * s/Suffrage/DesiredSuffrage in FollowerState * Instantiate autopilot only in leader * Default to old ways in more scenarios * Make API gracefully handle 404 * Address some feedback * Make IsDead an atomic.Value * Simplify follower hearbeat tracking * Use uber.atomic * Don't have multiple causes for having autopilot disabled * Don't remove node from follower states if we fail to remove the dead server * Autopilot server removals map (#11019) * Don't remove node from follower states if we fail to remove the dead server * Use map to track dead server removals * Use lock and map * Use delegate lock * Adjust when to remove entry from map * Only hold the lock while accessing map * Fix race * Don't set default min_quorum * Fix test * Ensure follower states is not nil before starting autopilot * Fix race Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com> Co-authored-by: Theron Voran <tvoran@users.noreply.github.com>
2021-03-03 18:59:50 +00:00
// DisableAutopilot is used to disable autopilot subsystem in raft storage
DisableAutopilot bool
// Whether to send headers in the HTTP response showing hostname or raft node ID
EnableResponseHeaderHostname bool
EnableResponseHeaderRaftNodeID bool
// DisableSSCTokens is used to disable the use of server side consistent tokens
DisableSSCTokens bool
2018-10-23 06:34:02 +00:00
}
// GetServiceRegistration returns the config's ServiceRegistration, or nil if it does
// not exist.
func (c *CoreConfig) GetServiceRegistration() sr.ServiceRegistration {
// Check whether there is a ServiceRegistration explicitly configured
if c.ServiceRegistration != nil {
return c.ServiceRegistration
}
// Check if HAPhysical is configured and implements ServiceRegistration
if c.HAPhysical != nil && c.HAPhysical.HAEnabled() {
if disc, ok := c.HAPhysical.(sr.ServiceRegistration); ok {
return disc
}
}
// No service discovery is available.
return nil
}
// CreateCore conducts static validations on the Core Config
// and returns an uninitialized core.
func CreateCore(conf *CoreConfig) (*Core, error) {
if conf.HAPhysical != nil && conf.HAPhysical.HAEnabled() {
if conf.RedirectAddr == "" {
return nil, fmt.Errorf("missing API address, please set in configuration or via environment")
}
2015-04-14 23:44:48 +00:00
}
2015-04-14 21:06:15 +00:00
if conf.DefaultLeaseTTL == 0 {
conf.DefaultLeaseTTL = defaultLeaseTTL
}
if conf.MaxLeaseTTL == 0 {
conf.MaxLeaseTTL = maxLeaseTTL
}
if conf.DefaultLeaseTTL > conf.MaxLeaseTTL {
return nil, fmt.Errorf("cannot have DefaultLeaseTTL larger than MaxLeaseTTL")
}
// Validate the advertise addr if its given to us
if conf.RedirectAddr != "" {
u, err := url.Parse(conf.RedirectAddr)
if err != nil {
return nil, fmt.Errorf("redirect address is not valid url: %w", err)
}
if u.Scheme == "" {
2016-08-19 14:52:14 +00:00
return nil, fmt.Errorf("redirect address must include scheme (ex. 'http')")
}
}
// Make a default logger if not provided
if conf.Logger == nil {
conf.Logger = logging.NewVaultLogger(log.Trace)
}
// Make a default metric sink if not provided
if conf.MetricSink == nil {
conf.MetricSink = metricsutil.BlackholeSink()
}
// Instantiate a non-nil raw config if none is provided
if conf.RawConfig == nil {
conf.RawConfig = new(server.Config)
}
// secureRandomReader cannot be nil
if conf.SecureRandomReader == nil {
conf.SecureRandomReader = rand.Reader
}
clusterHeartbeatInterval := conf.ClusterHeartbeatInterval
if clusterHeartbeatInterval == 0 {
clusterHeartbeatInterval = 5 * time.Second
}
Vault-1403 Switch Expiration Manager to use Fairsharing Backpressure (#1709) (#10932) * basic pool and start testing * refactor a bit for testing * workFunc, start/stop safety, testing * cleanup function for worker quit, more tests * redo public/private members * improve tests, export types, switch uuid package * fix loop capture bug, cleanup * cleanup tests * update worker pool file name, other improvements * add job manager prototype * remove remnants * add functions to wait for job manager and worker pool to stop, other fixes * test job manager functionality, fix bugs * encapsulate how jobs are distributed to workers * make worker job channel read only * add job interface, more testing, fixes * set name for dispatcher * fix test races * wire up expiration manager most of the way * dispatcher and job manager constructors don't return errors * logger now dependency injected * make some members private, test fcn to get worker pool size * make GetNumWorkers public * Update helper/fairshare/jobmanager_test.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * update fairsharing usage, add tests * make workerpool private * remove custom worker names * concurrency improvements * remove worker pool cleanup function * remove cleanup func from job manager, remove non blocking stop from fairshare * update job manager for new constructor * stop job manager when expiration manager stopped * unset env var after test * stop fairshare when started in tests * stop leaking job manager goroutine * prototype channel for waking up to assign work * fix typo/bug and add tests * improve job manager wake up, fix test typo * put channel drain back * better start/pause test for job manager * comment cleanup * degrade possible noisy log * remove closure, clean up context * improve revocation context timer * test: reduce number of revocation workers during many tests * Update vault/expiration.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * feedback tweaks Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2021-02-17 22:30:27 +00:00
if conf.NumExpirationWorkers == 0 {
conf.NumExpirationWorkers = numExpirationWorkersDefault
}
2015-03-09 23:33:27 +00:00
// Setup the core
c := &Core{
entCore: entCore{},
devToken: conf.DevToken,
physical: conf.Physical,
serviceRegistration: conf.GetServiceRegistration(),
underlyingPhysical: conf.Physical,
storageType: conf.StorageType,
redirectAddr: conf.RedirectAddr,
clusterAddr: new(atomic.Value),
clusterListener: new(atomic.Value),
customListenerHeader: new(atomic.Value),
seal: conf.Seal,
router: NewRouter(),
sealed: new(uint32),
sealMigrationDone: new(uint32),
standby: true,
standbyStopCh: new(atomic.Value),
baseLogger: conf.Logger,
logger: conf.Logger.Named("core"),
defaultLeaseTTL: conf.DefaultLeaseTTL,
maxLeaseTTL: conf.MaxLeaseTTL,
sentinelTraceDisabled: conf.DisableSentinelTrace,
cachingDisabled: conf.DisableCache,
clusterName: conf.ClusterName,
clusterNetworkLayer: conf.ClusterNetworkLayer,
clusterPeerClusterAddrsCache: cache.New(3*clusterHeartbeatInterval, time.Second),
enableMlock: !conf.DisableMlock,
rawEnabled: conf.EnableRaw,
shutdownDoneCh: make(chan struct{}),
replicationState: new(uint32),
atomicPrimaryClusterAddrs: new(atomic.Value),
atomicPrimaryFailoverAddrs: new(atomic.Value),
localClusterPrivateKey: new(atomic.Value),
localClusterCert: new(atomic.Value),
localClusterParsedCert: new(atomic.Value),
activeNodeReplicationState: new(uint32),
keepHALockOnStepDown: new(uint32),
replicationFailure: new(uint32),
disablePerfStandby: true,
activeContextCancelFunc: new(atomic.Value),
allLoggers: conf.AllLoggers,
builtinRegistry: conf.BuiltinRegistry,
neverBecomeActive: new(uint32),
clusterLeaderParams: new(atomic.Value),
metricsHelper: conf.MetricsHelper,
metricSink: conf.MetricSink,
secureRandomReader: conf.SecureRandomReader,
rawConfig: new(atomic.Value),
recoveryMode: conf.RecoveryMode,
postUnsealStarted: new(uint32),
raftJoinDoneCh: make(chan struct{}),
clusterHeartbeatInterval: clusterHeartbeatInterval,
activityLogConfig: conf.ActivityLogConfig,
keyRotateGracePeriod: new(int64),
numExpirationWorkers: conf.NumExpirationWorkers,
raftFollowerStates: raft.NewFollowerStates(),
disableAutopilot: conf.DisableAutopilot,
enableResponseHeaderHostname: conf.EnableResponseHeaderHostname,
enableResponseHeaderRaftNodeID: conf.EnableResponseHeaderRaftNodeID,
mountMigrationTracker: &sync.Map{},
disableSSCTokens: conf.DisableSSCTokens,
2015-03-09 23:33:27 +00:00
}
c.standbyStopCh.Store(make(chan struct{}))
atomic.StoreUint32(c.sealed, 1)
c.metricSink.SetGaugeWithLabels([]string{"core", "unsealed"}, 0, nil)
c.allLoggers = append(c.allLoggers, c.logger)
c.router.logger = c.logger.Named("router")
c.allLoggers = append(c.allLoggers, c.router.logger)
c.inFlightReqData = &InFlightRequests{
InFlightReqMap: &sync.Map{},
InFlightReqCount: uberAtomic.NewUint64(0),
}
c.SetConfig(conf.RawConfig)
2018-01-23 02:44:38 +00:00
atomic.StoreUint32(c.replicationState, uint32(consts.ReplicationDRDisabled|consts.ReplicationPerformanceDisabled))
c.localClusterCert.Store(([]byte)(nil))
c.localClusterParsedCert.Store((*x509.Certificate)(nil))
c.localClusterPrivateKey.Store((*ecdsa.PrivateKey)(nil))
2018-01-23 02:44:38 +00:00
c.clusterLeaderParams.Store((*ClusterLeaderParams)(nil))
c.clusterAddr.Store(conf.ClusterAddr)
c.activeContextCancelFunc.Store((context.CancelFunc)(nil))
atomic.StoreInt64(c.keyRotateGracePeriod, int64(2*time.Minute))
switch conf.ClusterCipherSuites {
case "tls13", "tls12":
// Do nothing, let Go use the default
case "":
// Add in forward compatible TLS 1.3 suites, followed by handpicked 1.2 suites
c.clusterCipherSuites = []uint16{
// 1.3
tls.TLS_AES_128_GCM_SHA256,
tls.TLS_AES_256_GCM_SHA384,
tls.TLS_CHACHA20_POLY1305_SHA256,
// 1.2
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
tls.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,
}
default:
suites, err := tlsutil.ParseCiphers(conf.ClusterCipherSuites)
if err != nil {
return nil, fmt.Errorf("error parsing cluster cipher suites: %w", err)
}
c.clusterCipherSuites = suites
}
// Load CORS config and provide a value for the core field.
2018-06-09 20:57:57 +00:00
c.corsConfig = &CORSConfig{
core: c,
Enabled: new(uint32),
}
2017-06-17 05:26:25 +00:00
2017-10-23 20:03:36 +00:00
if c.seal == nil {
c.seal = NewDefaultSeal(&vaultseal.Access{
Wrapper: aeadwrapper.NewShamirWrapper(&wrapping.WrapperOptions{
Logger: c.logger.Named("shamir"),
}),
})
2017-10-23 20:03:36 +00:00
}
c.seal.SetCore(c)
return c, nil
}
// NewCore is used to construct a new core
func NewCore(conf *CoreConfig) (*Core, error) {
var err error
c, err := CreateCore(conf)
if err != nil {
return nil, err
}
2017-10-23 20:03:36 +00:00
if err = coreInit(c, conf); err != nil {
2018-09-18 03:03:00 +00:00
return nil, err
2017-01-06 20:42:18 +00:00
}
if !conf.DisableMlock {
// Ensure our memory usage is locked into physical RAM
if err := mlock.LockMemory(); err != nil {
return nil, fmt.Errorf(
"Failed to lock memory: %v\n\n"+
"This usually means that the mlock syscall is not available.\n"+
"Vault uses mlock to prevent memory from being swapped to\n"+
"disk. This requires root privileges as well as a machine\n"+
"that supports mlock. Please enable mlock on your system or\n"+
"disable Vault from using it. To disable Vault from using it,\n"+
"set the `disable_mlock` configuration option in your configuration\n"+
"file.",
err)
}
}
2017-04-04 00:52:29 +00:00
// Construct a new AES-GCM barrier
c.barrier, err = NewAESGCMBarrier(c.physical)
if err != nil {
return nil, fmt.Errorf("barrier setup failed: %w", err)
}
if err := storedLicenseCheck(c, conf); err != nil {
return nil, err
}
2016-09-30 04:06:40 +00:00
// We create the funcs here, then populate the given config with it so that
// the caller can share state
conf.ReloadFuncsLock = &c.reloadFuncsLock
c.reloadFuncsLock.Lock()
c.reloadFuncs = make(map[string][]reloadutil.ReloadFunc)
2016-09-30 04:06:40 +00:00
c.reloadFuncsLock.Unlock()
conf.ReloadFuncs = &c.reloadFuncs
Recovery Mode (#7559) * Initial work * rework * s/dr/recovery * Add sys/raw support to recovery mode (#7577) * Factor the raw paths out so they can be run with a SystemBackend. # Conflicts: # vault/logical_system.go * Add handleLogicalRecovery which is like handleLogical but is only sufficient for use with the sys-raw endpoint in recovery mode. No authentication is done yet. * Integrate with recovery-mode. We now handle unauthenticated sys/raw requests, albeit on path v1/raw instead v1/sys/raw. * Use sys/raw instead raw during recovery. * Don't bother persisting the recovery token. Authenticate sys/raw requests with it. * RecoveryMode: Support generate-root for autounseals (#7591) * Recovery: Abstract config creation and log settings * Recovery mode integration test. (#7600) * Recovery: Touch up (#7607) * Recovery: Touch up * revert the raw backend creation changes * Added recovery operation token prefix * Move RawBackend to its own file * Update API path and hit it using CLI flag on generate-root * Fix a panic triggered when handling a request that yields a nil response. (#7618) * Improve integ test to actually make changes while in recovery mode and verify they're still there after coming back in regular mode. * Refuse to allow a second recovery token to be generated. * Resize raft cluster to size 1 and start as leader (#7626) * RecoveryMode: Setup raft cluster post unseal (#7635) * Setup raft cluster post unseal in recovery mode * Remove marking as unsealed as its not needed * Address review comments * Accept only one seal config in recovery mode as there is no scope for migration
2019-10-15 04:55:31 +00:00
// All the things happening below this are not required in
// recovery mode
if c.recoveryMode {
return c, nil
}
if conf.PluginDirectory != "" {
c.pluginDirectory, err = filepath.Abs(conf.PluginDirectory)
if err != nil {
return nil, fmt.Errorf("core setup failed, could not verify plugin directory: %w", err)
Recovery Mode (#7559) * Initial work * rework * s/dr/recovery * Add sys/raw support to recovery mode (#7577) * Factor the raw paths out so they can be run with a SystemBackend. # Conflicts: # vault/logical_system.go * Add handleLogicalRecovery which is like handleLogical but is only sufficient for use with the sys-raw endpoint in recovery mode. No authentication is done yet. * Integrate with recovery-mode. We now handle unauthenticated sys/raw requests, albeit on path v1/raw instead v1/sys/raw. * Use sys/raw instead raw during recovery. * Don't bother persisting the recovery token. Authenticate sys/raw requests with it. * RecoveryMode: Support generate-root for autounseals (#7591) * Recovery: Abstract config creation and log settings * Recovery mode integration test. (#7600) * Recovery: Touch up (#7607) * Recovery: Touch up * revert the raw backend creation changes * Added recovery operation token prefix * Move RawBackend to its own file * Update API path and hit it using CLI flag on generate-root * Fix a panic triggered when handling a request that yields a nil response. (#7618) * Improve integ test to actually make changes while in recovery mode and verify they're still there after coming back in regular mode. * Refuse to allow a second recovery token to be generated. * Resize raft cluster to size 1 and start as leader (#7626) * RecoveryMode: Setup raft cluster post unseal (#7635) * Setup raft cluster post unseal in recovery mode * Remove marking as unsealed as its not needed * Address review comments * Accept only one seal config in recovery mode as there is no scope for migration
2019-10-15 04:55:31 +00:00
}
}
createSecondaries(c, conf)
if conf.HAPhysical != nil && conf.HAPhysical.HAEnabled() {
c.ha = conf.HAPhysical
}
c.loginMFABackend = NewLoginMFABackend(c, conf.Logger)
logicalBackends := make(map[string]logical.Factory)
for k, f := range conf.LogicalBackends {
logicalBackends[k] = f
2015-03-15 23:25:38 +00:00
}
2018-11-07 01:21:24 +00:00
_, ok := logicalBackends["kv"]
if !ok {
logicalBackends["kv"] = PassthroughBackendFactory
}
2018-11-07 01:21:24 +00:00
logicalBackends["cubbyhole"] = CubbyholeBackendFactory
2018-09-18 03:03:00 +00:00
logicalBackends[systemMountType] = func(ctx context.Context, config *logical.BackendConfig) (logical.Backend, error) {
sysBackendLogger := conf.Logger.Named("system")
c.AddLogger(sysBackendLogger)
b := NewSystemBackend(c, sysBackendLogger)
if err := b.Setup(ctx, config); err != nil {
Backend plugin system (#2874) * Add backend plugin changes * Fix totp backend plugin tests * Fix logical/plugin InvalidateKey test * Fix plugin catalog CRUD test, fix NoopBackend * Clean up commented code block * Fix system backend mount test * Set plugin_name to omitempty, fix handleMountTable config parsing * Clean up comments, keep shim connections alive until cleanup * Include pluginClient, disallow LookupPlugin call from within a plugin * Add wrapper around backendPluginClient for proper cleanup * Add logger shim tests * Add logger, storage, and system shim tests * Use pointer receivers for system view shim * Use plugin name if no path is provided on mount * Enable plugins for auth backends * Add backend type attribute, move builtin/plugin/package * Fix merge conflict * Fix missing plugin name in mount config * Add integration tests on enabling auth backend plugins * Remove dependency cycle on mock-plugin * Add passthrough backend plugin, use logical.BackendType to determine lease generation * Remove vault package dependency on passthrough package * Add basic impl test for passthrough plugin * Incorporate feedback; set b.backend after shims creation on backendPluginServer * Fix totp plugin test * Add plugin backends docs * Fix tests * Fix builtin/plugin tests * Remove flatten from PluginRunner fields * Move mock plugin to logical/plugin, remove totp and passthrough plugins * Move pluginMap into newPluginClient * Do not create storage RPC connection on HandleRequest and HandleExistenceCheck * Change shim logger's Fatal to no-op * Change BackendType to uint32, match UX backend types * Change framework.Backend Setup signature * Add Setup func to logical.Backend interface * Move OptionallyEnableMlock call into plugin.Serve, update docs and comments * Remove commented var in plugin package * RegisterLicense on logical.Backend interface (#3017) * Add RegisterLicense to logical.Backend interface * Update RegisterLicense to use callback func on framework.Backend * Refactor framework.Backend.RegisterLicense * plugin: Prevent plugin.SystemViewClient.ResponseWrapData from getting JWTs * plugin: Revert BackendType to remove TypePassthrough and related references * Fix typo in plugin backends docs
2017-07-20 17:28:40 +00:00
return nil, err
}
return b, nil
2015-03-15 23:25:38 +00:00
}
logicalBackends["identity"] = func(ctx context.Context, config *logical.BackendConfig) (logical.Backend, error) {
identityLogger := conf.Logger.Named("identity")
c.AddLogger(identityLogger)
return NewIdentityStore(ctx, c, config, identityLogger)
}
2018-09-18 03:03:00 +00:00
addExtraLogicalBackends(c, logicalBackends)
c.logicalBackends = logicalBackends
2015-03-15 23:25:38 +00:00
credentialBackends := make(map[string]logical.Factory)
for k, f := range conf.CredentialBackends {
credentialBackends[k] = f
}
credentialBackends["token"] = func(ctx context.Context, config *logical.BackendConfig) (logical.Backend, error) {
tsLogger := conf.Logger.Named("token")
c.AddLogger(tsLogger)
return NewTokenStore(ctx, tsLogger, c, config)
}
2018-09-18 03:03:00 +00:00
addExtraCredentialBackends(c, credentialBackends)
c.credentialBackends = credentialBackends
auditBackends := make(map[string]audit.Factory)
for k, f := range conf.AuditBackends {
auditBackends[k] = f
}
c.auditBackends = auditBackends
2016-04-04 14:44:22 +00:00
uiStoragePrefix := systemBarrierPrefix + "ui"
c.uiConfig = NewUIConfig(conf.EnableUI, physical.NewView(c.physical, uiStoragePrefix), NewBarrierView(c.barrier, uiStoragePrefix))
c.clusterListener.Store((*cluster.Listener)(nil))
// for listeners with custom response headers, configuring customListenerHeader
if conf.RawConfig.Listeners != nil {
uiHeaders, err := c.UIHeaders()
if err != nil {
return nil, err
}
c.customListenerHeader.Store(NewListenerCustomHeader(conf.RawConfig.Listeners, c.logger, uiHeaders))
} else {
c.customListenerHeader.Store(([]*ListenerCustomHeaders)(nil))
}
logRequestsLevel := conf.RawConfig.LogRequestsLevel
c.logRequestsLevel = uberAtomic.NewInt32(0)
switch {
case log.LevelFromString(logRequestsLevel) > log.NoLevel && log.LevelFromString(logRequestsLevel) < log.Off:
c.logRequestsLevel.Store(int32(log.LevelFromString(logRequestsLevel)))
case logRequestsLevel != "":
c.logger.Warn("invalid log_requests_level", "level", conf.RawConfig.LogRequestsLevel)
}
2020-06-26 21:13:16 +00:00
quotasLogger := conf.Logger.Named("quotas")
c.allLoggers = append(c.allLoggers, quotasLogger)
c.quotaManager, err = quotas.NewManager(quotasLogger, c.quotaLeaseWalker, c.metricSink)
2020-06-11 19:07:59 +00:00
if err != nil {
return nil, err
}
err = c.adjustForSealMigration(conf.UnwrapSeal)
if err != nil {
return nil, err
}
if c.versionTimestamps == nil {
c.logger.Info("Initializing versionTimestamps for core")
c.versionTimestamps = make(map[string]time.Time)
}
2017-10-23 20:03:36 +00:00
return c, nil
2015-03-09 23:33:27 +00:00
}
// handleVersionTimeStamps stores the current version at the current time to
// storage, and then loads all versions and upgrade timestamps out from storage.
func (c *Core) handleVersionTimeStamps(ctx context.Context) error {
currentTime := time.Now().UTC()
isUpdated, err := c.storeVersionTimestamp(ctx, version.Version, currentTime, false)
if err != nil {
return fmt.Errorf("error storing vault version: %w", err)
}
if isUpdated {
c.logger.Info("Recorded vault version", "vault version", version.Version, "upgrade time", currentTime)
}
// Finally, load the versions into core fields
err = c.loadVersionTimestamps(ctx)
if err != nil {
return err
}
return nil
}
// HostnameHeaderEnabled determines whether to add the X-Vault-Hostname header
// to HTTP responses.
func (c *Core) HostnameHeaderEnabled() bool {
return c.enableResponseHeaderHostname
}
// RaftNodeIDHeaderEnabled determines whether to add the X-Vault-Raft-Node-ID header
// to HTTP responses.
func (c *Core) RaftNodeIDHeaderEnabled() bool {
return c.enableResponseHeaderRaftNodeID
}
// DisableSSCTokens determines whether to use server side consistent tokens or not.
func (c *Core) DisableSSCTokens() bool {
return c.disableSSCTokens
}
2015-06-18 01:23:59 +00:00
// Shutdown is invoked when the Vault instance is about to be terminated. It
// should not be accessible as part of an API call as it will cause an availability
// problem. It is only used to gracefully quit in the case of HA so that failover
// happens as quickly as possible.
func (c *Core) Shutdown() error {
c.logger.Debug("shutdown called")
err := c.sealInternal()
c.stateLock.Lock()
defer c.stateLock.Unlock()
if c.shutdownDoneCh != nil {
close(c.shutdownDoneCh)
c.shutdownDoneCh = nil
}
return err
}
func (c *Core) ShutdownWait() error {
donech := c.ShutdownDone()
err := c.Shutdown()
if donech != nil {
<-donech
}
return err
}
// ShutdownDone returns a channel that will be closed after Shutdown completes
func (c *Core) ShutdownDone() <-chan struct{} {
return c.shutdownDoneCh
2015-06-18 01:23:59 +00:00
}
2017-06-17 04:04:55 +00:00
// CORSConfig returns the current CORS configuration
func (c *Core) CORSConfig() *CORSConfig {
return c.corsConfig
}
func (c *Core) GetContext() (context.Context, context.CancelFunc) {
c.stateLock.RLock()
defer c.stateLock.RUnlock()
2018-09-18 03:03:00 +00:00
return context.WithCancel(namespace.RootContext(c.activeContext))
}
2015-03-09 23:33:27 +00:00
// Sealed checks if the Vault is current sealed
func (c *Core) Sealed() bool {
return atomic.LoadUint32(c.sealed) == 1
2015-03-09 23:33:27 +00:00
}
2015-03-11 18:52:01 +00:00
// SecretProgress returns the number of keys provided so far
func (c *Core) SecretProgress() (int, string) {
2015-03-11 18:52:01 +00:00
c.stateLock.RLock()
defer c.stateLock.RUnlock()
switch c.unlockInfo {
case nil:
return 0, ""
default:
return len(c.unlockInfo.Parts), c.unlockInfo.Nonce
}
2015-03-11 18:52:01 +00:00
}
// ResetUnsealProcess removes the current unlock parts from memory, to reset
// the unsealing process
func (c *Core) ResetUnsealProcess() {
c.stateLock.Lock()
defer c.stateLock.Unlock()
c.unlockInfo = nil
}
func (c *Core) UnsealMigrate(key []byte) (bool, error) {
err := c.unsealFragment(key, true)
return !c.Sealed(), err
2018-10-23 06:34:02 +00:00
}
// Unseal is used to provide one of the key parts to unseal the Vault.
func (c *Core) Unseal(key []byte) (bool, error) {
err := c.unsealFragment(key, false)
return !c.Sealed(), err
2018-10-23 06:34:02 +00:00
}
// unseal takes a key fragment and attempts to use it to unseal Vault.
// Vault may remain unsealed afterwards even when no error is returned,
// depending on whether enough key fragments were provided to meet the
// target threshold.
//
// The provided key should be a recovery key fragment if the seal
// is an autoseal, or a regular seal key fragment for shamir. In
// migration scenarios "seal" in the preceding sentance refers to
// the migration seal in c.migrationInfo.seal.
//
// We use getUnsealKey to work out if we have enough fragments,
// and if we don't have enough we return early. Otherwise we get
// back the combined key.
//
// For legacy shamir the combined key *is* the master key. For
// shamir the combined key is used to decrypt the master key
// read from storage. For autoseal the combined key isn't used
// except to verify that the stored recovery key matches.
//
// In migration scenarios a side-effect of unsealing is that
// the members of c.migrationInfo are populated (excluding
// .seal, which must already be populated before unseal is called.)
func (c *Core) unsealFragment(key []byte, migrate bool) error {
2015-04-08 23:43:17 +00:00
defer metrics.MeasureSince([]string{"core", "unseal"}, time.Now())
c.stateLock.Lock()
defer c.stateLock.Unlock()
ctx := context.Background()
if migrate && c.migrationInfo == nil {
return fmt.Errorf("can't perform a seal migration, no migration seal found")
}
if migrate && c.isRaftUnseal() {
return fmt.Errorf("can't perform a seal migration while joining a raft cluster")
}
if !migrate && c.migrationInfo != nil {
done, err := c.sealMigrated(ctx)
if err != nil {
return fmt.Errorf("error checking to see if seal is migrated: %w", err)
}
if !done {
return fmt.Errorf("migrate option not provided and seal migration is pending")
}
}
c.logger.Debug("unseal key supplied", "migrate", migrate)
// Explicitly check for init status. This also checks if the seal
// configuration is valid (i.e. non-nil).
init, err := c.Initialized(ctx)
if err != nil {
return err
}
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
if !init && !c.isRaftUnseal() {
return ErrNotInit
}
2015-03-12 18:20:27 +00:00
// Verify the key length
min, max := c.barrier.KeyLength()
max += shamir.ShareOverhead
if len(key) < min {
return &ErrInvalidKey{fmt.Sprintf("key is shorter than minimum %d bytes", min)}
2015-03-12 18:20:27 +00:00
}
if len(key) > max {
return &ErrInvalidKey{fmt.Sprintf("key is longer than maximum %d bytes", max)}
2015-03-12 18:20:27 +00:00
}
// Check if already unsealed
if !c.Sealed() {
return nil
}
2018-10-23 06:34:02 +00:00
sealToUse := c.seal
if migrate {
c.logger.Info("unsealing using migration seal")
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
sealToUse = c.migrationInfo.seal
}
newKey, err := c.recordUnsealPart(key)
if !newKey || err != nil {
return err
}
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
// getUnsealKey returns either a recovery key (in the case of an autoseal)
// or a master key (legacy shamir) or an unseal key (new-style shamir).
combinedKey, err := c.getUnsealKey(ctx, sealToUse)
if err != nil || combinedKey == nil {
return err
}
if migrate {
c.migrationInfo.unsealKey = combinedKey
}
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
if c.isRaftUnseal() {
return c.unsealWithRaft(combinedKey)
}
masterKey, err := c.unsealKeyToMasterKeyPreUnseal(ctx, sealToUse, combinedKey)
if err != nil {
return err
}
return c.unsealInternal(ctx, masterKey)
}
func (c *Core) unsealWithRaft(combinedKey []byte) error {
ctx := context.Background()
if c.seal.BarrierType() == wrapping.Shamir {
// If this is a legacy shamir seal this serves no purpose but it
// doesn't hurt.
err := c.seal.GetAccess().Wrapper.(*aeadwrapper.ShamirWrapper).SetAESGCMKeyBytes(combinedKey)
if err != nil {
return err
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
}
}
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
switch c.raftInfo.joinInProgress {
case true:
// JoinRaftCluster is already trying to perform a join based on retry_join configuration.
// Inform that routine that unseal key validation is complete so that it can continue to
// try and join possible leader nodes, and wait for it to complete.
atomic.StoreUint32(c.postUnsealStarted, 1)
c.logger.Info("waiting for raft retry join process to complete")
<-c.raftJoinDoneCh
default:
// This is the case for manual raft join. Send the answer to the leader node and
// wait for data to start streaming in.
if err := c.joinRaftSendAnswer(ctx, c.seal.GetAccess(), c.raftInfo); err != nil {
return err
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
}
// Reset the state
c.raftInfo = nil
}
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
go func() {
var masterKey []byte
keyringFound := false
// Wait until we at least have the keyring before we attempt to
// unseal the node.
for {
if !keyringFound {
keys, err := c.underlyingPhysical.List(ctx, keyringPrefix)
if err != nil {
c.logger.Error("failed to list physical keys", "error", err)
return
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
}
if strutil.StrListContains(keys, "keyring") {
keyringFound = true
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
}
}
if keyringFound && len(masterKey) == 0 {
var err error
masterKey, err = c.unsealKeyToMasterKeyPreUnseal(ctx, c.seal, combinedKey)
if err != nil {
c.logger.Error("failed to read master key", "error", err)
return
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
}
}
if keyringFound && len(masterKey) > 0 {
err := c.unsealInternal(ctx, masterKey)
if err != nil {
c.logger.Error("failed to unseal", "error", err)
}
return
}
time.Sleep(1 * time.Second)
}
}()
return nil
}
// recordUnsealPart takes in a key fragment, and returns true if it's a new fragment.
func (c *Core) recordUnsealPart(key []byte) (bool, error) {
2015-03-11 18:43:36 +00:00
// Check if we already have this piece
if c.unlockInfo != nil {
for _, existing := range c.unlockInfo.Parts {
if subtle.ConstantTimeCompare(existing, key) == 1 {
return false, nil
}
}
} else {
uuid, err := uuid.GenerateUUID()
if err != nil {
return false, err
}
c.unlockInfo = &unlockInformation{
Nonce: uuid,
2015-03-11 18:43:36 +00:00
}
}
// Store this key
c.unlockInfo.Parts = append(c.unlockInfo.Parts, key)
return true, nil
}
2015-03-11 18:43:36 +00:00
// getUnsealKey uses key fragments recorded by recordUnsealPart and
// returns the combined key if the key share threshold is met.
// If the key fragments are part of a recovery key, also verify that
// it matches the stored recovery key on disk.
func (c *Core) getUnsealKey(ctx context.Context, seal Seal) ([]byte, error) {
2018-10-23 06:34:02 +00:00
var config *SealConfig
var err error
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
switch {
case seal.RecoveryKeySupported():
2018-10-23 06:34:02 +00:00
config, err = seal.RecoveryConfig(ctx)
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
case c.isRaftUnseal():
// Ignore follower's seal config and refer to leader's barrier
// configuration.
config = c.raftInfo.leaderBarrierConfig
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
default:
2018-10-23 06:34:02 +00:00
config, err = seal.BarrierConfig(ctx)
}
if err != nil {
return nil, err
}
if config == nil {
return nil, fmt.Errorf("failed to obtain seal/recovery configuration")
}
2018-10-23 06:34:02 +00:00
// Check if we don't have enough keys to unlock, proceed through the rest of
// the call only if we have met the threshold
if len(c.unlockInfo.Parts) < config.SecretThreshold {
2016-08-19 20:45:17 +00:00
if c.logger.IsDebug() {
c.logger.Debug("cannot unseal, not enough keys", "keys", len(c.unlockInfo.Parts), "threshold", config.SecretThreshold, "nonce", c.unlockInfo.Nonce)
2016-08-19 20:45:17 +00:00
}
return nil, nil
2015-03-11 18:43:36 +00:00
}
defer func() {
c.unlockInfo = nil
}()
// Recover the split key. recoveredKey is the shamir combined
// key, or the single provided key if the threshold is 1.
var unsealKey []byte
2015-03-11 18:43:36 +00:00
if config.SecretThreshold == 1 {
unsealKey = make([]byte, len(c.unlockInfo.Parts[0]))
copy(unsealKey, c.unlockInfo.Parts[0])
2015-03-11 18:43:36 +00:00
} else {
unsealKey, err = shamir.Combine(c.unlockInfo.Parts)
2015-03-11 18:43:36 +00:00
if err != nil {
return nil, fmt.Errorf("failed to compute combined key: %w", err)
2015-03-11 18:43:36 +00:00
}
}
if seal.RecoveryKeySupported() {
if err := seal.VerifyRecoveryKey(ctx, unsealKey); err != nil {
return nil, err
}
}
return unsealKey, nil
}
// sealMigrated must be called with the stateLock held. It returns true if
// the seal configured in HCL and the seal configured in storage match.
// For the auto->auto same seal migration scenario, it will return false even
// if the preceding conditions are true but we cannot decrypt the master key
// in storage using the configured seal.
func (c *Core) sealMigrated(ctx context.Context) (bool, error) {
if atomic.LoadUint32(c.sealMigrationDone) == 1 {
return true, nil
2018-10-23 06:34:02 +00:00
}
existBarrierSealConfig, existRecoverySealConfig, err := c.PhysicalSealConfigs(ctx)
if err != nil {
return false, err
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
if existBarrierSealConfig.Type != c.seal.BarrierType() {
return false, nil
}
if c.seal.RecoveryKeySupported() && existRecoverySealConfig.Type != c.seal.RecoveryType() {
return false, nil
}
if c.seal.BarrierType() != c.migrationInfo.seal.BarrierType() {
return true, nil
}
// The above checks can handle the auto->shamir and shamir->auto
// and auto1->auto2 cases. For auto1->auto1, we need to actually try
// to read and decrypt the keys.
keysMig, errMig := c.migrationInfo.seal.GetStoredKeys(ctx)
keys, err := c.seal.GetStoredKeys(ctx)
switch {
case len(keys) > 0 && err == nil:
return true, nil
case len(keysMig) > 0 && errMig == nil:
return false, nil
case errors.Is(err, &ErrDecrypt{}) && errors.Is(errMig, &ErrDecrypt{}):
return false, fmt.Errorf("decrypt error, neither the old nor new seal can read stored keys: old seal err=%v, new seal err=%v", errMig, err)
default:
return false, fmt.Errorf("neither the old nor new seal can read stored keys: old seal err=%v, new seal err=%v", errMig, err)
}
}
// migrateSeal must be called with the stateLock held.
func (c *Core) migrateSeal(ctx context.Context) error {
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
if c.migrationInfo == nil {
return nil
}
ok, err := c.sealMigrated(ctx)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
if err != nil {
return fmt.Errorf("error checking if seal is migrated or not: %w", err)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
if ok {
c.logger.Info("migration is already performed")
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
return nil
}
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
c.logger.Info("seal migration initiated")
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
switch {
case c.migrationInfo.seal.RecoveryKeySupported() && c.seal.RecoveryKeySupported():
c.logger.Info("migrating from one auto-unseal to another", "from",
c.migrationInfo.seal.BarrierType(), "to", c.seal.BarrierType())
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
// Set the recovery and barrier keys to be the same.
recoveryKey, err := c.migrationInfo.seal.RecoveryKey(ctx)
if err != nil {
return fmt.Errorf("error getting recovery key to set on new seal: %w", err)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
if err := c.seal.SetRecoveryKey(ctx, recoveryKey); err != nil {
return fmt.Errorf("error setting new recovery key information during migrate: %w", err)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
barrierKeys, err := c.migrationInfo.seal.GetStoredKeys(ctx)
if err != nil {
return fmt.Errorf("error getting stored keys to set on new seal: %w", err)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
2018-10-23 06:34:02 +00:00
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
if err := c.seal.SetStoredKeys(ctx, barrierKeys); err != nil {
return fmt.Errorf("error setting new barrier key information during migrate: %w", err)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
2018-10-23 06:34:02 +00:00
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
case c.migrationInfo.seal.RecoveryKeySupported():
c.logger.Info("migrating from one auto-unseal to shamir", "from", c.migrationInfo.seal.BarrierType())
// Auto to Shamir, since recovery key isn't supported on new seal
2018-10-23 06:34:02 +00:00
recoveryKey, err := c.migrationInfo.seal.RecoveryKey(ctx)
if err != nil {
return fmt.Errorf("error getting recovery key to set on new seal: %w", err)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
2018-10-23 06:34:02 +00:00
// We have recovery keys; we're going to use them as the new shamir KeK.
err = c.seal.GetAccess().Wrapper.(*aeadwrapper.ShamirWrapper).SetAESGCMKeyBytes(recoveryKey)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
if err != nil {
return fmt.Errorf("failed to set master key in seal: %w", err)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
2018-10-23 06:34:02 +00:00
barrierKeys, err := c.migrationInfo.seal.GetStoredKeys(ctx)
if err != nil {
return fmt.Errorf("error getting stored keys to set on new seal: %w", err)
}
if err := c.seal.SetStoredKeys(ctx, barrierKeys); err != nil {
return fmt.Errorf("error setting new barrier key information during migrate: %w", err)
2018-10-23 06:34:02 +00:00
}
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
case c.seal.RecoveryKeySupported():
c.logger.Info("migrating from shamir to auto-unseal", "to", c.seal.BarrierType())
// Migration is happening from shamir -> auto. In this case use the shamir
// combined key that was used to store the master key as the new recovery key.
if err := c.seal.SetRecoveryKey(ctx, c.migrationInfo.unsealKey); err != nil {
return fmt.Errorf("error setting new recovery key information: %w", err)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
2018-10-23 06:34:02 +00:00
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
// Generate a new master key
newMasterKey, err := c.barrier.GenerateKey(c.secureRandomReader)
2018-10-23 06:34:02 +00:00
if err != nil {
return fmt.Errorf("error generating new master key: %w", err)
2018-10-23 06:34:02 +00:00
}
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
// Rekey the barrier. This handles the case where the shamir seal we're
// migrating from was a legacy seal without a stored master key.
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
if err := c.barrier.Rekey(ctx, newMasterKey); err != nil {
return fmt.Errorf("error rekeying barrier during migration: %w", err)
2018-10-23 06:34:02 +00:00
}
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
// Store the new master key
if err := c.seal.SetStoredKeys(ctx, [][]byte{newMasterKey}); err != nil {
return fmt.Errorf("error storing new master key: %w", err)
}
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
default:
return errors.New("unhandled migration case (shamir to shamir)")
}
err = c.migrateSealConfig(ctx)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
if err != nil {
return fmt.Errorf("error storing new seal configs: %w", err)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
// Flag migration performed for seal-rewrap later
atomic.StoreUint32(c.sealMigrationDone, 1)
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
c.logger.Info("seal migration complete")
return nil
}
// unsealInternal takes in the master key and attempts to unseal the barrier.
// N.B.: This must be called with the state write lock held.
func (c *Core) unsealInternal(ctx context.Context, masterKey []byte) error {
2015-03-11 18:43:36 +00:00
// Attempt to unlock
if err := c.barrier.Unseal(ctx, masterKey); err != nil {
return err
2015-03-11 18:43:36 +00:00
}
2018-09-18 03:03:00 +00:00
if err := preUnsealInternal(ctx, c); err != nil {
return err
2018-09-18 03:03:00 +00:00
}
if err := c.startClusterListener(ctx); err != nil {
return err
}
raft: add support for using backend for ha_storage (#9193) * raft: initial work on raft ha storage support * add note on join * add todo note * raft: add support for bootstrapping and joining existing nodes * raft: gate bootstrap join by reading leader api address from storage * raft: properly check for raft-only for certain conditionals * raft: add bootstrap to api and cli * raft: fix bootstrap cli command * raft: add test for setting up new cluster with raft HA * raft: extend TestRaft_HA_NewCluster to include inmem and consul backends * raft: add test for updating an existing cluster to use raft HA * raft: remove debug log lines, clean up verifyRaftPeers * raft: minor cleanup * raft: minor cleanup * Update physical/raft/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/ha.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/ha.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/logical_system_raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * address feedback comments * address feedback comments * raft: refactor tls keyring logic * address feedback comments * Update vault/raft.go Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * address feedback comments * testing: fix import ordering * raft: rename var, cleanup comment line * docs: remove ha_storage restriction note on raft * docs: more raft HA interaction updates with migration and recovery mode * docs: update the raft join command * raft: update comments * raft: add missing isRaftHAOnly check for clearing out state set earlier * raft: update a few ha_storage config checks * Update command/operator_raft_bootstrap.go Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * raft: address feedback comments * raft: fix panic when checking for config.HAStorage.Type * Update vault/raft.go Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * Update website/pages/docs/commands/operator/raft.mdx Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * raft: remove bootstrap cli command * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * raft: address review feedback * raft: revert vendored sdk * raft: don't send applied index and node ID info if we're HA-only Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com>
2020-06-23 19:04:13 +00:00
if err := c.startRaftBackend(ctx); err != nil {
return err
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
}
2020-02-15 00:39:13 +00:00
if err := c.setupReplicationResolverHandler(); err != nil {
c.logger.Warn("failed to start replication resolver server", "error", err)
}
2015-04-14 21:06:15 +00:00
// Do post-unseal setup if HA is not enabled
if c.ha == nil {
// We still need to set up cluster info even if it's not part of a
2017-01-04 21:44:03 +00:00
// cluster right now. This also populates the cached cluster object.
if err := c.setupCluster(ctx); err != nil {
c.logger.Error("cluster setup failed", "error", err)
c.barrier.Seal()
c.logger.Warn("vault is sealed")
return err
}
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
if err := c.migrateSeal(ctx); err != nil {
c.logger.Error("seal migration error", "error", err)
c.barrier.Seal()
c.logger.Warn("vault is sealed")
return err
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
}
2018-09-18 03:03:00 +00:00
ctx, ctxCancel := context.WithCancel(namespace.RootContext(nil))
if err := c.postUnseal(ctx, ctxCancel, standardUnsealStrategy{}); err != nil {
c.logger.Error("post-unseal setup failed", "error", err)
2015-04-14 21:06:15 +00:00
c.barrier.Seal()
c.logger.Warn("vault is sealed")
return err
2015-04-14 21:06:15 +00:00
}
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
// Force a cache bust here, which will also run migration code
if c.seal.RecoveryKeySupported() {
c.seal.SetRecoveryConfig(ctx, nil)
}
c.standby = false
2015-04-14 21:06:15 +00:00
} else {
// Go to standby mode, wait until we are active to unseal
c.standbyDoneCh = make(chan struct{})
c.manualStepDownCh = make(chan struct{}, 1)
c.standbyStopCh.Store(make(chan struct{}))
go c.runStandby(c.standbyDoneCh, c.manualStepDownCh, c.standbyStopCh.Load().(chan struct{}))
2015-03-11 22:19:41 +00:00
}
// Success!
atomic.StoreUint32(c.sealed, 0)
c.metricSink.SetGaugeWithLabels([]string{"core", "unsealed"}, 1, nil)
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
if c.logger.IsInfo() {
c.logger.Info("vault is unsealed")
}
if c.serviceRegistration != nil {
if err := c.serviceRegistration.NotifySealedStateChange(false); err != nil {
if c.logger.IsWarn() {
c.logger.Warn("failed to notify unsealed status", "error", err)
}
}
if err := c.serviceRegistration.NotifyInitializedStateChange(true); err != nil {
if c.logger.IsWarn() {
c.logger.Warn("failed to notify initialized status", "error", err)
}
}
}
return nil
2015-03-09 23:33:27 +00:00
}
2015-03-10 00:45:34 +00:00
// SealWithRequest takes in a logical.Request, acquires the lock, and passes
// through to sealInternal
func (c *Core) SealWithRequest(httpCtx context.Context, req *logical.Request) error {
defer metrics.MeasureSince([]string{"core", "seal-with-request"}, time.Now())
if c.Sealed() {
return nil
}
c.stateLock.RLock()
// This will unlock the read lock
// We use background context since we may not be active
2018-09-18 03:03:00 +00:00
ctx, cancel := context.WithCancel(namespace.RootContext(nil))
defer cancel()
go func() {
select {
case <-ctx.Done():
case <-httpCtx.Done():
cancel()
}
}()
// This will unlock the read lock
return c.sealInitCommon(ctx, req)
}
// Seal takes in a token and creates a logical.Request, acquires the lock, and
// passes through to sealInternal
func (c *Core) Seal(token string) error {
2015-04-08 23:43:17 +00:00
defer metrics.MeasureSince([]string{"core", "seal"}, time.Now())
if c.Sealed() {
return nil
2015-03-10 00:45:34 +00:00
}
2015-03-31 16:59:02 +00:00
c.stateLock.RLock()
req := &logical.Request{
Operation: logical.UpdateOperation,
Path: "sys/seal",
ClientToken: token,
}
// This will unlock the read lock
// We use background context since we may not be active
2018-09-18 03:03:00 +00:00
return c.sealInitCommon(namespace.RootContext(nil), req)
}
// sealInitCommon is common logic for Seal and SealWithRequest and is used to
// re-seal the Vault. This requires the Vault to be unsealed again to perform
// any further operations. Note: this function will read-unlock the state lock.
func (c *Core) sealInitCommon(ctx context.Context, req *logical.Request) (retErr error) {
defer metrics.MeasureSince([]string{"core", "seal-internal"}, time.Now())
var unlocked bool
defer func() {
if !unlocked {
c.stateLock.RUnlock()
}
}()
if req == nil {
return errors.New("nil request to seal")
}
// Since there is no token store in standby nodes, sealing cannot be done.
// Ideally, the request has to be forwarded to leader node for validation
// and the operation should be performed. But for now, just returning with
// an error and recommending a vault restart, which essentially does the
// same thing.
if c.standby {
c.logger.Error("vault cannot seal when in standby mode; please restart instead")
return errors.New("vault cannot seal when in standby mode; please restart instead")
}
err := c.PopulateTokenEntry(ctx, req)
if err != nil {
if errwrap.Contains(err, logical.ErrPermissionDenied.Error()) {
return logical.ErrPermissionDenied
}
return logical.ErrInvalidRequest
}
2018-09-18 03:03:00 +00:00
acl, te, entity, identityPolicies, err := c.fetchACLTokenEntryAndEntity(ctx, req)
2015-03-31 16:59:02 +00:00
if err != nil {
return err
2015-03-31 16:59:02 +00:00
}
// Audit-log the request before going any further
auth := &logical.Auth{
2018-09-18 03:03:00 +00:00
ClientToken: req.ClientToken,
2018-10-15 16:56:24 +00:00
Accessor: req.ClientTokenAccessor,
}
if te != nil {
2018-09-18 03:03:00 +00:00
auth.IdentityPolicies = identityPolicies[te.NamespaceID]
delete(identityPolicies, te.NamespaceID)
auth.ExternalNamespacePolicies = identityPolicies
auth.TokenPolicies = te.Policies
2018-09-18 03:03:00 +00:00
auth.Policies = append(te.Policies, identityPolicies[te.NamespaceID]...)
auth.Metadata = te.Meta
auth.DisplayName = te.DisplayName
auth.EntityID = te.EntityID
2018-10-15 16:56:24 +00:00
auth.TokenType = te.Type
}
logInput := &logical.LogInput{
Auth: auth,
Request: req,
}
if err := c.auditBroker.LogRequest(ctx, logInput, c.auditedHeaders); err != nil {
c.logger.Error("failed to audit request", "request_path", req.Path, "error", err)
return errors.New("failed to audit request, cannot continue")
}
if entity != nil && entity.Disabled {
c.logger.Warn("permission denied as the entity on the token is disabled")
return logical.ErrPermissionDenied
}
if te != nil && te.EntityID != "" && entity == nil {
c.logger.Warn("permission denied as the entity on the token is invalid")
return logical.ErrPermissionDenied
}
2016-02-29 02:35:32 +00:00
// Attempt to use the token (decrement num_uses)
// On error bail out; if the token has been revoked, bail out too
2016-02-29 02:35:32 +00:00
if te != nil {
te, err = c.tokenStore.UseToken(ctx, te)
if err != nil {
c.logger.Error("failed to use token", "error", err)
return ErrInternalError
}
if te == nil {
// Token is no longer valid
return logical.ErrPermissionDenied
}
2016-02-29 02:35:32 +00:00
}
2015-03-31 16:59:02 +00:00
// Verify that this operation is allowed
2018-01-19 07:43:36 +00:00
authResults := c.performPolicyChecks(ctx, acl, te, req, entity, &PolicyCheckOpts{
2017-10-23 20:03:36 +00:00
RootPrivsRequired: true,
})
if !authResults.Allowed {
2018-08-11 02:32:10 +00:00
retErr = multierror.Append(retErr, authResults.Error)
if authResults.Error.ErrorOrNil() == nil || authResults.DeniedError {
2018-08-11 02:32:10 +00:00
retErr = multierror.Append(retErr, logical.ErrPermissionDenied)
}
2018-08-11 02:32:10 +00:00
return retErr
}
if te != nil && te.NumUses == tokenRevocationPending {
// Token needs to be revoked. We do this immediately here because
// we won't have a token store after sealing.
leaseID, err := c.expiration.CreateOrFetchRevocationLeaseByToken(c.activeContext, te)
if err == nil {
err = c.expiration.Revoke(c.activeContext, leaseID)
}
if err != nil {
c.logger.Error("token needed revocation before seal but failed to revoke", "error", err)
retErr = multierror.Append(retErr, ErrInternalError)
}
}
// Unlock; sealing will grab the lock when needed
unlocked = true
c.stateLock.RUnlock()
2018-07-25 03:26:28 +00:00
sealErr := c.sealInternal()
if sealErr != nil {
retErr = multierror.Append(retErr, sealErr)
}
return
2015-06-18 01:23:59 +00:00
}
// UIEnabled returns if the UI is enabled
func (c *Core) UIEnabled() bool {
return c.uiConfig.Enabled()
}
// UIHeaders returns configured UI headers
func (c *Core) UIHeaders() (http.Header, error) {
return c.uiConfig.Headers(context.Background())
}
// sealInternal is an internal method used to seal the vault. It does not do
// any authorization checking.
2018-07-25 03:26:28 +00:00
func (c *Core) sealInternal() error {
2019-06-21 00:55:10 +00:00
return c.sealInternalWithOptions(true, false, true)
2018-07-25 03:26:28 +00:00
}
raft: add support for using backend for ha_storage (#9193) * raft: initial work on raft ha storage support * add note on join * add todo note * raft: add support for bootstrapping and joining existing nodes * raft: gate bootstrap join by reading leader api address from storage * raft: properly check for raft-only for certain conditionals * raft: add bootstrap to api and cli * raft: fix bootstrap cli command * raft: add test for setting up new cluster with raft HA * raft: extend TestRaft_HA_NewCluster to include inmem and consul backends * raft: add test for updating an existing cluster to use raft HA * raft: remove debug log lines, clean up verifyRaftPeers * raft: minor cleanup * raft: minor cleanup * Update physical/raft/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/ha.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/ha.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/logical_system_raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * address feedback comments * address feedback comments * raft: refactor tls keyring logic * address feedback comments * Update vault/raft.go Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * address feedback comments * testing: fix import ordering * raft: rename var, cleanup comment line * docs: remove ha_storage restriction note on raft * docs: more raft HA interaction updates with migration and recovery mode * docs: update the raft join command * raft: update comments * raft: add missing isRaftHAOnly check for clearing out state set earlier * raft: update a few ha_storage config checks * Update command/operator_raft_bootstrap.go Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * raft: address feedback comments * raft: fix panic when checking for config.HAStorage.Type * Update vault/raft.go Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * Update website/pages/docs/commands/operator/raft.mdx Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * raft: remove bootstrap cli command * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * raft: address review feedback * raft: revert vendored sdk * raft: don't send applied index and node ID info if we're HA-only Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com>
2020-06-23 19:04:13 +00:00
func (c *Core) sealInternalWithOptions(grabStateLock, keepHALock, performCleanup bool) error {
// Mark sealed, and if already marked return
if swapped := atomic.CompareAndSwapUint32(c.sealed, 0, 1); !swapped {
return nil
}
c.metricSink.SetGaugeWithLabels([]string{"core", "unsealed"}, 0, nil)
c.logger.Info("marked as sealed")
2017-03-01 23:16:47 +00:00
// Clear forwarding clients
c.requestForwardingConnectionLock.Lock()
c.clearForwardingClients()
c.requestForwardingConnectionLock.Unlock()
activeCtxCancel := c.activeContextCancelFunc.Load().(context.CancelFunc)
cancelCtxAndLock := func() {
doneCh := make(chan struct{})
go func() {
select {
case <-doneCh:
// Attempt to drain any inflight requests
case <-time.After(DefaultMaxRequestDuration):
if activeCtxCancel != nil {
activeCtxCancel()
}
}
}()
c.stateLock.Lock()
close(doneCh)
// Stop requests from processing
if activeCtxCancel != nil {
activeCtxCancel()
}
}
2015-04-14 21:06:15 +00:00
// Do pre-seal teardown if HA is not enabled
if c.ha == nil {
if grabStateLock {
cancelCtxAndLock()
defer c.stateLock.Unlock()
}
// Even in a non-HA context we key off of this for some things
c.standby = true
// Stop requests from processing
if activeCtxCancel != nil {
activeCtxCancel()
}
2015-04-14 21:06:15 +00:00
if err := c.preSeal(); err != nil {
c.logger.Error("pre-seal teardown failed", "error", err)
2015-04-14 21:06:15 +00:00
return fmt.Errorf("internal error")
}
} else {
// If we are keeping the lock we already have the state write lock
// held. Otherwise grab it here so that when stopCh is triggered we are
// locked.
2018-07-25 03:26:28 +00:00
if keepHALock {
atomic.StoreUint32(c.keepHALockOnStepDown, 1)
2018-07-25 03:26:28 +00:00
}
if grabStateLock {
cancelCtxAndLock()
defer c.stateLock.Unlock()
}
// If we are trying to acquire the lock, force it to return with nil so
// runStandby will exit
// If we are active, signal the standby goroutine to shut down and wait
// for completion. We have the state lock here so nothing else should
// be toggling standby status.
close(c.standbyStopCh.Load().(chan struct{}))
c.logger.Debug("finished triggering standbyStopCh for runStandby")
// Wait for runStandby to stop
<-c.standbyDoneCh
atomic.StoreUint32(c.keepHALockOnStepDown, 0)
c.logger.Debug("runStandby done")
2015-03-13 18:16:24 +00:00
}
2020-02-15 00:39:13 +00:00
c.teardownReplicationResolverHandler()
raft: add support for using backend for ha_storage (#9193) * raft: initial work on raft ha storage support * add note on join * add todo note * raft: add support for bootstrapping and joining existing nodes * raft: gate bootstrap join by reading leader api address from storage * raft: properly check for raft-only for certain conditionals * raft: add bootstrap to api and cli * raft: fix bootstrap cli command * raft: add test for setting up new cluster with raft HA * raft: extend TestRaft_HA_NewCluster to include inmem and consul backends * raft: add test for updating an existing cluster to use raft HA * raft: remove debug log lines, clean up verifyRaftPeers * raft: minor cleanup * raft: minor cleanup * Update physical/raft/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/ha.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/ha.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/logical_system_raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * address feedback comments * address feedback comments * raft: refactor tls keyring logic * address feedback comments * Update vault/raft.go Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * address feedback comments * testing: fix import ordering * raft: rename var, cleanup comment line * docs: remove ha_storage restriction note on raft * docs: more raft HA interaction updates with migration and recovery mode * docs: update the raft join command * raft: update comments * raft: add missing isRaftHAOnly check for clearing out state set earlier * raft: update a few ha_storage config checks * Update command/operator_raft_bootstrap.go Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * raft: address feedback comments * raft: fix panic when checking for config.HAStorage.Type * Update vault/raft.go Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * Update website/pages/docs/commands/operator/raft.mdx Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> * raft: remove bootstrap cli command * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * Update vault/raft.go Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> * raft: address review feedback * raft: revert vendored sdk * raft: don't send applied index and node ID info if we're HA-only Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com> Co-authored-by: Alexander Bezobchuk <alexanderbez@users.noreply.github.com> Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com>
2020-06-23 19:04:13 +00:00
// Perform additional cleanup upon sealing.
if performCleanup {
if raftBackend := c.getRaftBackend(); raftBackend != nil {
if err := raftBackend.TeardownCluster(c.getClusterListener()); err != nil {
2019-06-21 00:55:10 +00:00
c.logger.Error("error stopping storage cluster", "error", err)
return err
}
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
}
2019-06-21 00:55:10 +00:00
// Stop the cluster listener
c.stopClusterListener()
}
c.logger.Debug("sealing barrier")
if err := c.barrier.Seal(); err != nil {
c.logger.Error("error sealing barrier", "error", err)
return err
}
if c.serviceRegistration != nil {
if err := c.serviceRegistration.NotifySealedStateChange(true); err != nil {
if c.logger.IsWarn() {
c.logger.Warn("failed to notify sealed status", "error", err)
}
}
}
if c.quotaManager != nil {
if err := c.quotaManager.Reset(); err != nil {
c.logger.Error("error resetting quota manager", "error", err)
}
2020-06-26 21:13:16 +00:00
}
2018-09-18 03:03:00 +00:00
postSealInternal(c)
c.logger.Info("vault is sealed")
return nil
2015-03-10 00:45:34 +00:00
}
2015-03-11 22:19:41 +00:00
2018-09-18 03:03:00 +00:00
type UnsealStrategy interface {
unseal(context.Context, log.Logger, *Core) error
}
2018-09-18 03:03:00 +00:00
type standardUnsealStrategy struct{}
2017-01-06 20:42:18 +00:00
2018-09-18 03:03:00 +00:00
func (s standardUnsealStrategy) unseal(ctx context.Context, logger log.Logger, c *Core) error {
2017-03-01 23:16:47 +00:00
// Clear forwarding clients; we're active
c.requestForwardingConnectionLock.Lock()
c.clearForwardingClients()
c.requestForwardingConnectionLock.Unlock()
// Mark the active time. We do this first so it can be correlated to the logs
// for the active startup.
c.activeTime = time.Now().UTC()
2018-09-18 03:03:00 +00:00
if err := postUnsealPhysical(c); err != nil {
return err
}
if err := enterprisePostUnseal(c, false); err != nil {
return err
}
if !c.ReplicationState().HasState(consts.ReplicationPerformanceSecondary | consts.ReplicationDRSecondary) {
// Only perf primarys should write feature flags, but we do it by
// excluding other states so that we don't have to change it when
// a non-replicated cluster becomes a primary.
if err := c.persistFeatureFlags(ctx); err != nil {
return err
}
}
if c.autoRotateCancel == nil {
var autoRotateCtx context.Context
autoRotateCtx, c.autoRotateCancel = context.WithCancel(c.activeContext)
go c.autoRotateBarrierLoop(autoRotateCtx)
}
2018-09-18 03:03:00 +00:00
if !c.IsDRSecondary() {
if err := c.ensureWrappingKey(ctx); err != nil {
return err
}
2017-01-04 21:44:03 +00:00
}
if err := c.handleVersionTimeStamps(ctx); err != nil {
return err
}
2018-11-07 01:21:24 +00:00
if err := c.setupPluginCatalog(ctx); err != nil {
return err
}
if err := c.loadMounts(ctx); err != nil {
2015-03-11 22:19:41 +00:00
return err
}
if err := enterpriseSetupFilteredPaths(c); err != nil {
return err
}
if err := c.setupMounts(ctx); err != nil {
return err
}
if err := enterpriseSetupAPILock(c, ctx); err != nil {
return err
}
if err := c.setupPolicyStore(ctx); err != nil {
return err
}
if err := c.setupManagedKeyRegistry(); err != nil {
return err
}
if err := c.loadCORSConfig(ctx); err != nil {
2017-06-17 04:04:55 +00:00
return err
}
if err := c.loadCredentials(ctx); err != nil {
return err
}
if err := enterpriseSetupFilteredPaths(c); err != nil {
return err
}
if err := c.setupCredentials(ctx); err != nil {
return err
2015-03-18 22:30:31 +00:00
}
2020-06-26 21:13:16 +00:00
if err := c.setupQuotas(ctx, false); err != nil {
return err
}
c.setupCachedMFAResponseAuth()
if err := c.setupHeaderHMACKey(ctx, false); err != nil {
2020-06-26 21:13:16 +00:00
return err
}
2018-09-18 03:03:00 +00:00
if !c.IsDRSecondary() {
if err := c.startRollback(); err != nil {
return err
}
if err := c.setupExpiration(expireLeaseStrategyFairsharing); err != nil {
2018-09-18 03:03:00 +00:00
return err
}
if err := c.loadAudits(ctx); err != nil {
return err
}
if err := c.setupAudits(ctx); err != nil {
return err
}
if err := c.loadIdentityStoreArtifacts(ctx); err != nil {
return err
}
if err := loadMFAConfigs(ctx, c); err != nil {
return err
}
if err := c.setupAuditedHeadersConfig(ctx); err != nil {
return err
}
// not waiting on wg to avoid changing existing behavior
var wg sync.WaitGroup
if err := c.setupActivityLog(ctx, &wg); err != nil {
return err
}
} else {
c.auditBroker = NewAuditBroker(c.logger)
}
if !c.ReplicationState().HasState(consts.ReplicationPerformanceSecondary | consts.ReplicationDRSecondary) {
// Cannot do this above, as we need other resources like mounts to be setup
if err := c.setupPluginReload(); err != nil {
2018-09-18 03:03:00 +00:00
return err
}
Lazy-load plugin mounts (#3255) * Lazy load plugins to avoid setup-unwrap cycle * Remove commented blocks * Refactor NewTestCluster, use single core cluster on basic plugin tests * Set c.pluginDirectory in TestAddTestPlugin for setupPluginCatalog to work properly * Add special path to mock plugin * Move ensureCoresSealed to vault/testing.go * Use same method for EnsureCoresSealed and Cleanup * Bump ensureCoresSealed timeout to 60s * Correctly handle nil opts on NewTestCluster * Add metadata flag to APIClientMeta, use meta-enabled plugin when mounting to bootstrap * Check metadata flag directly on the plugin process * Plumb isMetadataMode down to PluginRunner * Add NOOP shims when running in metadata mode * Remove unused flag from the APIMetadata object * Remove setupSecretPlugins and setupCredentialPlugins functions * Move when we setup rollback manager to after the plugins are initialized * Fix tests * Fix merge issue * start rollback manager after the credential setup * Add guards against running certain client and server functions while in metadata mode * Call initialize once a plugin is loaded on the fly * Add more tests, update basic secret/auth plugin tests to trigger lazy loading * Skip mount if plugin removed from catalog * Fixup * Remove commented line on LookupPlugin * Fail on mount operation if plugin is re-added to catalog and mount is on existing path * Check type and special paths on startBackend * Fix merge conflicts * Refactor PluginRunner run methods to use runCommon, fix TestSystemBackend_Plugin_auth
2017-09-01 05:02:03 +00:00
}
2018-09-18 03:03:00 +00:00
if c.getClusterListener() != nil && (c.ha != nil || shouldStartClusterListener(c)) {
if err := c.setupRaftActiveNode(ctx); err != nil {
return err
}
if err := c.startForwarding(ctx); err != nil {
2018-09-18 03:03:00 +00:00
return err
}
Raft Storage Backend (#6888) * Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
2019-06-20 19:14:58 +00:00
}
2018-09-18 03:03:00 +00:00
c.clusterParamsLock.Lock()
defer c.clusterParamsLock.Unlock()
if err := startReplication(c); err != nil {
return err
}
2018-09-18 03:03:00 +00:00
return nil
}
// postUnseal is invoked on the active node, and performance standby nodes,
// after the barrier is unsealed, but before
2018-09-18 03:03:00 +00:00
// allowing any user operations. This allows us to setup any state that
// requires the Vault to be unsealed such as mount tables, logical backends,
// credential stores, etc.
func (c *Core) postUnseal(ctx context.Context, ctxCancelFunc context.CancelFunc, unsealer UnsealStrategy) (retErr error) {
defer metrics.MeasureSince([]string{"core", "post_unseal"}, time.Now())
// Clear any out
c.postUnsealFuncs = nil
// Create a new request context
c.activeContext = ctx
c.activeContextCancelFunc.Store(ctxCancelFunc)
defer func() {
if retErr != nil {
ctxCancelFunc()
c.preSeal()
}
}()
c.logger.Info("post-unseal setup starting")
// Enable the cache
c.physicalCache.Purge(ctx)
if !c.cachingDisabled {
c.physicalCache.SetEnabled(true)
}
2018-09-18 03:03:00 +00:00
// Purge these for safety in case of a rekey
c.seal.SetBarrierConfig(ctx, nil)
if c.seal.RecoveryKeySupported() {
c.seal.SetRecoveryConfig(ctx, nil)
2017-10-23 20:03:36 +00:00
}
2018-09-18 03:03:00 +00:00
if err := unsealer.unseal(ctx, c.logger, c); err != nil {
return err
}
2017-04-04 00:52:29 +00:00
// Automatically re-encrypt the keys used for auto unsealing when the
// seal's encryption key changes. The regular rotation of cryptographic
// keys is a NIST recommendation. Access to prior keys for decryption
// is normally supported for a configurable time period. Re-encrypting
// the keys used for auto unsealing ensures Vault and its data will
// continue to be accessible even after prior seal keys are destroyed.
if seal, ok := c.seal.(*autoSeal); ok {
if err := seal.UpgradeKeys(c.activeContext); err != nil {
c.logger.Warn("post-unseal upgrade seal keys failed", "error", err)
}
// Start a periodic but infrequent heartbeat to detect auto-seal backend outages at runtime rather than being
// surprised by this at the next need to unseal.
seal.StartHealthCheck()
}
2015-04-08 23:43:17 +00:00
c.metricsCh = make(chan struct{})
go c.emitMetrics(c.metricsCh)
// This is intentionally the last block in this function. We want to allow
// writes just before allowing client requests, to ensure everything has
// been set up properly before any writes can have happened.
for _, v := range c.postUnsealFuncs {
v()
}
if atomic.LoadUint32(c.sealMigrationDone) == 1 {
if err := c.postSealMigration(ctx); err != nil {
c.logger.Warn("post-unseal post seal migration failed", "error", err)
}
}
if os.Getenv(EnvVaultDisableLocalAuthMountEntities) != "" {
c.logger.Warn("disabling entities for local auth mounts through env var", "env", EnvVaultDisableLocalAuthMountEntities)
}
c.loginMFABackend.usedCodes = cache.New(0, 30*time.Second)
c.logger.Info("post-unseal setup complete")
2015-03-11 22:19:41 +00:00
return nil
}
2015-03-13 18:16:24 +00:00
// preSeal is invoked before the barrier is sealed, allowing
// for any state teardown required.
2015-11-02 18:29:18 +00:00
func (c *Core) preSeal() error {
2015-04-08 23:43:17 +00:00
defer metrics.MeasureSince([]string{"core", "pre_seal"}, time.Now())
c.logger.Info("pre-seal teardown starting")
2015-05-28 19:07:52 +00:00
if seal, ok := c.seal.(*autoSeal); ok {
seal.StopHealthCheck()
}
// Clear any pending funcs
c.postUnsealFuncs = nil
c.activeTime = time.Time{}
2015-05-28 19:07:52 +00:00
// Clear any rekey progress
2016-04-04 14:44:22 +00:00
c.barrierRekeyConfig = nil
c.recoveryRekeyConfig = nil
2015-05-28 19:07:52 +00:00
2015-04-08 23:43:17 +00:00
if c.metricsCh != nil {
close(c.metricsCh)
c.metricsCh = nil
}
2015-11-02 18:29:18 +00:00
var result error
2019-06-21 00:55:10 +00:00
c.stopForwarding()
c.stopRaftActiveNode()
2018-09-18 03:03:00 +00:00
c.clusterParamsLock.Lock()
if err := stopReplication(c); err != nil {
result = multierror.Append(result, fmt.Errorf("error stopping replication: %w", err))
2018-09-18 03:03:00 +00:00
}
c.clusterParamsLock.Unlock()
if err := c.teardownAudits(); err != nil {
result = multierror.Append(result, fmt.Errorf("error tearing down audits: %w", err))
}
if err := c.stopExpiration(); err != nil {
result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err))
}
c.stopActivityLog()
// Clear any cached auth response
c.mfaResponseAuthQueueLock.Lock()
c.mfaResponseAuthQueue = nil
c.mfaResponseAuthQueueLock.Unlock()
if err := c.teardownCredentials(context.Background()); err != nil {
result = multierror.Append(result, fmt.Errorf("error tearing down credentials: %w", err))
2015-03-18 22:30:31 +00:00
}
if err := c.teardownPolicyStore(); err != nil {
result = multierror.Append(result, fmt.Errorf("error tearing down policy store: %w", err))
}
if err := c.stopRollback(); err != nil {
result = multierror.Append(result, fmt.Errorf("error stopping rollback: %w", err))
}
if err := c.unloadMounts(context.Background()); err != nil {
result = multierror.Append(result, fmt.Errorf("error unloading mounts: %w", err))
2015-03-13 18:16:24 +00:00
}
if err := enterprisePreSeal(c); err != nil {
result = multierror.Append(result, err)
}
if c.autoRotateCancel != nil {
c.autoRotateCancel()
c.autoRotateCancel = nil
}
if seal, ok := c.seal.(*autoSeal); ok {
seal.StopHealthCheck()
}
c.loginMFABackend.usedCodes = nil
2018-09-18 03:03:00 +00:00
preSealPhysical(c)
c.logger.Info("pre-seal teardown complete")
2015-11-02 18:29:18 +00:00
return result
2015-03-13 18:16:24 +00:00
}
2015-04-08 23:43:17 +00:00
func enterprisePostUnsealImpl(c *Core, isStandby bool) error {
return nil
}
func enterprisePreSealImpl(c *Core) error {
return nil
}
func enterpriseSetupFilteredPathsImpl(c *Core) error {
return nil
}
2020-06-26 21:13:16 +00:00
func enterpriseSetupQuotasImpl(ctx context.Context, c *Core) error {
return nil
}
func startReplicationImpl(c *Core) error {
return nil
}
func stopReplicationImpl(c *Core) error {
return nil
}
func setupAPILockImpl(_ *Core, _ context.Context) error { return nil }
2021-10-07 17:25:16 +00:00
func (c *Core) ReplicationState() consts.ReplicationState {
return consts.ReplicationState(atomic.LoadUint32(c.replicationState))
}
func (c *Core) ActiveNodeReplicationState() consts.ReplicationState {
return consts.ReplicationState(atomic.LoadUint32(c.activeNodeReplicationState))
}
2016-04-04 14:44:22 +00:00
func (c *Core) SealAccess() *SealAccess {
2017-10-23 20:03:36 +00:00
return NewSealAccess(c.seal)
2016-04-04 14:44:22 +00:00
}
// StorageType returns a string equal to the storage configuration's type.
func (c *Core) StorageType() string {
return c.storageType
}
2016-08-19 20:45:17 +00:00
func (c *Core) Logger() log.Logger {
return c.logger
}
func (c *Core) BarrierKeyLength() (min, max int) {
min, max = c.barrier.KeyLength()
max += shamir.ShareOverhead
return
}
func (c *Core) AuditedHeadersConfig() *AuditedHeadersConfig {
return c.auditedHeaders
}
2017-03-01 17:39:42 +00:00
2018-09-18 03:03:00 +00:00
func waitUntilWALShippedImpl(ctx context.Context, c *Core, index uint64) bool {
return true
}
func merkleRootImpl(c *Core) string {
return ""
}
func lastWALImpl(c *Core) uint64 {
return 0
}
func lastPerformanceWALImpl(c *Core) uint64 {
return 0
}
2021-10-26 21:17:20 +00:00
func lastDRWALImpl(c *Core) uint64 {
return 0
}
2017-03-01 17:42:10 +00:00
func lastRemoteWALImpl(c *Core) uint64 {
2017-03-01 17:39:42 +00:00
return 0
}
2017-10-23 20:03:36 +00:00
func lastRemoteUpstreamWALImpl(c *Core) uint64 {
return 0
}
2018-10-23 06:34:02 +00:00
func (c *Core) PhysicalSealConfigs(ctx context.Context) (*SealConfig, *SealConfig, error) {
pe, err := c.physical.Get(ctx, barrierSealConfigPath)
if err != nil {
return nil, nil, fmt.Errorf("failed to fetch barrier seal configuration at migration check time: %w", err)
2018-10-23 06:34:02 +00:00
}
if pe == nil {
return nil, nil, nil
}
barrierConf := new(SealConfig)
if err := jsonutil.DecodeJSON(pe.Value, barrierConf); err != nil {
return nil, nil, fmt.Errorf("failed to decode barrier seal configuration at migration check time: %w", err)
2018-10-23 06:34:02 +00:00
}
err = barrierConf.Validate()
if err != nil {
return nil, nil, fmt.Errorf("failed to validate barrier seal configuration at migration check time: %w", err)
}
// In older versions of vault the default seal would not store a type. This
// is here to offer backwards compatibility for older seal configs.
if barrierConf.Type == "" {
barrierConf.Type = wrapping.Shamir
}
2018-10-23 06:34:02 +00:00
var recoveryConf *SealConfig
pe, err = c.physical.Get(ctx, recoverySealConfigPlaintextPath)
if err != nil {
return nil, nil, fmt.Errorf("failed to fetch seal configuration at migration check time: %w", err)
2018-10-23 06:34:02 +00:00
}
if pe != nil {
recoveryConf = &SealConfig{}
if err := jsonutil.DecodeJSON(pe.Value, recoveryConf); err != nil {
return nil, nil, fmt.Errorf("failed to decode seal configuration at migration check time: %w", err)
2018-10-23 06:34:02 +00:00
}
err = recoveryConf.Validate()
if err != nil {
return nil, nil, fmt.Errorf("failed to validate seal configuration at migration check time: %w", err)
}
// In older versions of vault the default seal would not store a type. This
// is here to offer backwards compatibility for older seal configs.
if recoveryConf.Type == "" {
recoveryConf.Type = wrapping.Shamir
}
2018-10-23 06:34:02 +00:00
}
return barrierConf, recoveryConf, nil
}
// adjustForSealMigration takes the unwrapSeal, which is nil if (a) we're not
// configured for seal migration or (b) we might be doing a seal migration away
// from shamir. It will only be non-nil if there is a configured seal with
// the config key disabled=true, which implies a migration away from autoseal.
//
// For case (a), the common case, we expect that the stored barrier
// config matches the seal type, in which case we simply return nil. If they
// don't match, and the stored seal config is of type Shamir but the configured
// seal is not Shamir, that is case (b) and we make an unwrapSeal of type Shamir.
// Any other unwrapSeal=nil scenario is treated as an error.
//
// Given a non-nil unwrapSeal or case (b), we setup c.migrationInfo to prepare
// for a migration upon receiving a valid migration unseal request. We cannot
// check at this time for already performed (or incomplete) migrations because
// we haven't yet been unsealed, so we have no way of checking whether a
// shamir seal works to read stored seal-encrypted data.
//
// The assumption throughout is that the very last step of seal migration is
// to write the new barrier/recovery stored seal config.
2020-06-11 19:07:59 +00:00
func (c *Core) adjustForSealMigration(unwrapSeal Seal) error {
ctx := context.Background()
existBarrierSealConfig, existRecoverySealConfig, err := c.PhysicalSealConfigs(ctx)
2020-06-11 19:07:59 +00:00
if err != nil {
return fmt.Errorf("Error checking for existing seal: %s", err)
}
// If we don't have an existing config or if it's the deprecated auto seal
// which needs an upgrade, skip out
if existBarrierSealConfig == nil || existBarrierSealConfig.Type == wrapping.HSMAutoDeprecated {
return nil
}
if unwrapSeal == nil {
// With unwrapSeal==nil, either we're not migrating, or we're migrating
// from shamir.
switch {
case existBarrierSealConfig.Type == c.seal.BarrierType():
// We have the same barrier type and the unwrap seal is nil so we're not
// migrating from same to same, IOW we assume it's not a migration.
2020-06-11 19:07:59 +00:00
return nil
case c.seal.BarrierType() == wrapping.Shamir:
// The stored barrier config is not shamir, there is no disabled seal
// in config, and either no configured seal (which equates to Shamir)
// or an explicitly configured Shamir seal.
return fmt.Errorf("cannot seal migrate from %q to Shamir, no disabled seal in configuration",
existBarrierSealConfig.Type)
case existBarrierSealConfig.Type == wrapping.Shamir:
// The configured seal is not Shamir, the stored seal config is Shamir.
// This is a migration away from Shamir.
unwrapSeal = NewDefaultSeal(&vaultseal.Access{
Wrapper: aeadwrapper.NewShamirWrapper(&wrapping.WrapperOptions{
Logger: c.logger.Named("shamir"),
}),
})
default:
// We know at this point that there is a configured non-Shamir seal,
// that it does not match the stored non-Shamir seal config, and that
// there is no explicit disabled seal stanza.
return fmt.Errorf("cannot seal migrate from %q to %q, no disabled seal in configuration",
existBarrierSealConfig.Type, c.seal.BarrierType())
2020-06-11 19:07:59 +00:00
}
} else {
// If we're not coming from Shamir we expect the previous seal to be
// in the config and disabled.
2020-06-11 19:07:59 +00:00
if unwrapSeal.BarrierType() == wrapping.Shamir {
return errors.New("Shamir seals cannot be set disabled (they should simply not be set)")
}
}
// If we've reached this point it's a migration attempt and we should have both
// c.migrationInfo.seal (old seal) and c.seal (new seal) populated.
unwrapSeal.SetCore(c)
2020-06-11 19:07:59 +00:00
// No stored recovery seal config found, what about the legacy recovery config?
2020-06-11 19:07:59 +00:00
if existBarrierSealConfig.Type != wrapping.Shamir && existRecoverySealConfig == nil {
entry, err := c.physical.Get(ctx, recoverySealConfigPath)
if err != nil {
return fmt.Errorf("failed to read %q recovery seal configuration: %w", existBarrierSealConfig.Type, err)
}
if entry == nil {
return errors.New("Recovery seal configuration not found for existing seal")
}
return errors.New("Cannot migrate seals while using a legacy recovery seal config")
2020-06-11 19:07:59 +00:00
}
c.migrationInfo = &migrationInformation{
seal: unwrapSeal,
}
if existBarrierSealConfig.Type != c.seal.BarrierType() {
// It's unnecessary to call this when doing an auto->auto
// same-seal-type migration, since they'll have the same configs before
// and after migration.
c.adjustSealConfigDuringMigration(existBarrierSealConfig, existRecoverySealConfig)
2020-06-11 19:07:59 +00:00
}
c.initSealsForMigration()
c.logger.Warn("entering seal migration mode; Vault will not automatically unseal even if using an autoseal", "from_barrier_type", c.migrationInfo.seal.BarrierType(), "to_barrier_type", c.seal.BarrierType())
2020-06-11 19:07:59 +00:00
return nil
}
func (c *Core) migrateSealConfig(ctx context.Context) error {
existBarrierSealConfig, existRecoverySealConfig, err := c.PhysicalSealConfigs(ctx)
if err != nil {
return fmt.Errorf("failed to read existing seal configuration during migration: %v", err)
2020-06-11 19:07:59 +00:00
}
var bc, rc *SealConfig
2020-06-11 19:07:59 +00:00
switch {
case c.migrationInfo.seal.RecoveryKeySupported() && c.seal.RecoveryKeySupported():
2020-06-11 19:07:59 +00:00
// Migrating from auto->auto, copy the configs over
bc, rc = existBarrierSealConfig, existRecoverySealConfig
case c.migrationInfo.seal.RecoveryKeySupported():
2020-06-11 19:07:59 +00:00
// Migrating from auto->shamir, clone auto's recovery config and set
// stored keys to 1.
bc = existRecoverySealConfig.Clone()
bc.StoredShares = 1
case c.seal.RecoveryKeySupported():
// Migrating from shamir->auto, set a new barrier config and set
// recovery config to a clone of shamir's barrier config with stored
// keys set to 0.
bc = &SealConfig{
Type: c.seal.BarrierType(),
SecretShares: 1,
SecretThreshold: 1,
StoredShares: 1,
}
rc = existBarrierSealConfig.Clone()
rc.StoredShares = 0
}
if err := c.seal.SetBarrierConfig(ctx, bc); err != nil {
return fmt.Errorf("error storing barrier config after migration: %w", err)
}
if c.seal.RecoveryKeySupported() {
if err := c.seal.SetRecoveryConfig(ctx, rc); err != nil {
return fmt.Errorf("error storing recovery config after migration: %w", err)
}
} else if err := c.physical.Delete(ctx, recoverySealConfigPlaintextPath); err != nil {
return fmt.Errorf("failed to delete old recovery seal configuration during migration: %w", err)
}
return nil
}
func (c *Core) adjustSealConfigDuringMigration(existBarrierSealConfig, existRecoverySealConfig *SealConfig) {
switch {
case c.migrationInfo.seal.RecoveryKeySupported() && existRecoverySealConfig != nil:
// Migrating from auto->shamir, clone auto's recovery config and set
// stored keys to 1. Unless the recover config doesn't exist, in which
// case the migration is assumed to already have been performed.
2020-06-11 19:07:59 +00:00
newSealConfig := existRecoverySealConfig.Clone()
newSealConfig.StoredShares = 1
c.seal.SetCachedBarrierConfig(newSealConfig)
case !c.migrationInfo.seal.RecoveryKeySupported() && c.seal.RecoveryKeySupported():
2020-06-11 19:07:59 +00:00
// Migrating from shamir->auto, set a new barrier config and set
// recovery config to a clone of shamir's barrier config with stored
// keys set to 0.
newBarrierSealConfig := &SealConfig{
Type: c.seal.BarrierType(),
2020-06-11 19:07:59 +00:00
SecretShares: 1,
SecretThreshold: 1,
StoredShares: 1,
}
c.seal.SetCachedBarrierConfig(newBarrierSealConfig)
2020-06-11 19:07:59 +00:00
newRecoveryConfig := existBarrierSealConfig.Clone()
newRecoveryConfig.StoredShares = 0
c.seal.SetCachedRecoveryConfig(newRecoveryConfig)
}
2018-10-23 06:34:02 +00:00
}
func (c *Core) unsealKeyToRootKeyPostUnseal(ctx context.Context, combinedKey []byte) ([]byte, error) {
return c.unsealKeyToMasterKey(ctx, c.seal, combinedKey, true, false)
}
func (c *Core) unsealKeyToMasterKeyPreUnseal(ctx context.Context, seal Seal, combinedKey []byte) ([]byte, error) {
return c.unsealKeyToMasterKey(ctx, seal, combinedKey, false, true)
}
// unsealKeyToMasterKey takes a key provided by the user, either a recovery key
// if using an autoseal or an unseal key with Shamir. It returns a nil error
// if the key is valid and an error otherwise. It also returns the master key
// that can be used to unseal the barrier.
// If useTestSeal is true, seal will not be modified; this is used when not
// invoked as part of an unseal process. Otherwise in the non-legacy shamir
// case the combinedKey will be set in the seal, which means subsequent attempts
// to use the seal to read the master key will succeed, assuming combinedKey is
// valid.
// If allowMissing is true, a failure to find the master key in storage results
// in a nil error and a nil master key being returned.
func (c *Core) unsealKeyToMasterKey(ctx context.Context, seal Seal, combinedKey []byte, useTestSeal bool, allowMissing bool) ([]byte, error) {
switch seal.StoredKeysSupported() {
case vaultseal.StoredKeysSupportedGeneric:
if err := seal.VerifyRecoveryKey(ctx, combinedKey); err != nil {
return nil, fmt.Errorf("recovery key verification failed: %w", err)
}
storedKeys, err := seal.GetStoredKeys(ctx)
if storedKeys == nil && err == nil && allowMissing {
return nil, nil
}
if err == nil && len(storedKeys) != 1 {
err = fmt.Errorf("expected exactly one stored key, got %d", len(storedKeys))
}
if err != nil {
return nil, fmt.Errorf("unable to retrieve stored keys: %w", err)
}
return storedKeys[0], nil
case vaultseal.StoredKeysSupportedShamirRoot:
if useTestSeal {
testseal := NewDefaultSeal(&vaultseal.Access{
Wrapper: aeadwrapper.NewShamirWrapper(&wrapping.WrapperOptions{
Logger: c.logger.Named("testseal"),
}),
})
testseal.SetCore(c)
cfg, err := seal.BarrierConfig(ctx)
if err != nil {
return nil, fmt.Errorf("failed to setup test barrier config: %w", err)
}
testseal.SetCachedBarrierConfig(cfg)
seal = testseal
}
err := seal.GetAccess().Wrapper.(*aeadwrapper.ShamirWrapper).SetAESGCMKeyBytes(combinedKey)
if err != nil {
return nil, fmt.Errorf("failed to setup unseal key: %w", err)
}
storedKeys, err := seal.GetStoredKeys(ctx)
if storedKeys == nil && err == nil && allowMissing {
return nil, nil
}
if err == nil && len(storedKeys) != 1 {
err = fmt.Errorf("expected exactly one stored key, got %d", len(storedKeys))
}
if err != nil {
return nil, fmt.Errorf("unable to retrieve stored keys: %w", err)
}
return storedKeys[0], nil
case vaultseal.StoredKeysNotSupported:
return combinedKey, nil
}
return nil, fmt.Errorf("invalid seal")
}
// IsInSealMigrationMode returns true if we're configured to perform a seal migration,
// meaning either that we have a disabled seal in HCL configuration or the seal
// configuration in storage is Shamir but the seal in HCL is not. In this
// mode we should not auto-unseal (even if the migration is done) and we will
// accept unseal requests with and without the `migrate` option, though the migrate
// option is required if we haven't yet performed the seal migration.
func (c *Core) IsInSealMigrationMode() bool {
2018-10-23 06:34:02 +00:00
c.stateLock.RLock()
defer c.stateLock.RUnlock()
Seal migration with Raft (#8103) * Seal migration after unsealing * Refactor migration fields migrationInformation in core * Perform seal migration as part of postUnseal * Remove the sleep logic * Use proper seal in the unseal function * Fix migration from Auto to Shamir * Fix the recovery config missing issue * Address the non-ha migration case * Fix the multi cluster case * Avoid re-running seal migration * Run the post migration code in new leaders * Fix the issue of wrong recovery being set * Address review feedback * Add more complete testing coverage for seal migrations. (#8247) * Add more complete testing coverage for seal migrations. Also remove VAULT_ACC gate from some tests that just depend on docker, cleanup dangling recovery config in storage after migration, and fix a call in adjustCoreForSealMigration that seems broken. * Fix the issue of wrong recovery key being set * Adapt tests to work with multiple cores. * Add missing line to disable raft join. Co-authored-by: Vishal Nayak <vishalnayak@users.noreply.github.com> * Fix all known issues * Remove warning * Review feedback. * Revert my previous change that broke raft tests. We'll need to come back and at least comment this once we better understand why it's needed. * Don't allow migration between same types for now * Disable auto to auto tests for now since it uses migration between same types which is not allowed * Update vault/core.go Co-Authored-By: Brian Kassouf <briankassouf@users.noreply.github.com> * Add migration logs * Address review comments * Add the recovery config check back * Skip a few steps if migration is already done * Return from waitForLeadership if migration fails Co-authored-by: ncabatoff <nick.cabatoff@gmail.com> Co-authored-by: Brian Kassouf <briankassouf@users.noreply.github.com>
2020-02-13 21:27:31 +00:00
return c.migrationInfo != nil
2018-10-23 06:34:02 +00:00
}
// IsSealMigrated returns true if we're in seal migration mode but migration
// has already been performed (possibly by another node, or prior to this node's
// current invocation.)
func (c *Core) IsSealMigrated() bool {
if !c.IsInSealMigrationMode() {
return false
}
c.stateLock.RLock()
defer c.stateLock.RUnlock()
done, _ := c.sealMigrated(context.Background())
return done
}
2017-10-23 20:03:36 +00:00
func (c *Core) BarrierEncryptorAccess() *BarrierEncryptorAccess {
return NewBarrierEncryptorAccess(c.barrier)
}
func (c *Core) PhysicalAccess() *physical.PhysicalAccess {
return physical.NewPhysicalAccess(c.physical)
}
func (c *Core) RouterAccess() *RouterAccess {
return NewRouterAccess(c)
}
// IsDRSecondary returns if the current cluster state is a DR secondary.
func (c *Core) IsDRSecondary() bool {
return c.ReplicationState().HasState(consts.ReplicationDRSecondary)
}
func (c *Core) IsPerfSecondary() bool {
return c.ReplicationState().HasState(consts.ReplicationPerformanceSecondary)
}
func (c *Core) AddLogger(logger log.Logger) {
c.allLoggersLock.Lock()
defer c.allLoggersLock.Unlock()
c.allLoggers = append(c.allLoggers, logger)
}
func (c *Core) SetLogLevel(level log.Level) {
c.allLoggersLock.RLock()
defer c.allLoggersLock.RUnlock()
for _, logger := range c.allLoggers {
logger.SetLevel(level)
}
}
2018-11-07 01:21:24 +00:00
// SetConfig sets core's config object to the newly provided config.
func (c *Core) SetConfig(conf *server.Config) {
c.rawConfig.Store(conf)
bz, err := json.Marshal(c.SanitizedConfig())
if err != nil {
c.logger.Error("error serializing sanitized config", "error", err)
return
}
c.logger.Debug("set config", "sanitized config", string(bz))
}
func (c *Core) GetListenerCustomResponseHeaders(listenerAdd string) *ListenerCustomHeaders {
customHeaders := c.customListenerHeader.Load()
if customHeaders == nil {
return nil
}
customHeadersList, ok := customHeaders.([]*ListenerCustomHeaders)
if customHeadersList == nil || !ok {
return nil
}
for _, l := range customHeadersList {
if l.Address == listenerAdd {
return l
}
}
return nil
}
// ExistCustomResponseHeader checks if a custom header is configured in any
// listener's stanza
func (c *Core) ExistCustomResponseHeader(header string) bool {
customHeaders := c.customListenerHeader.Load()
if customHeaders == nil {
return false
}
customHeadersList, ok := customHeaders.([]*ListenerCustomHeaders)
if customHeadersList == nil || !ok {
return false
}
for _, l := range customHeadersList {
exist := l.ExistCustomResponseHeader(header)
if exist {
return true
}
}
return false
}
func (c *Core) ReloadCustomResponseHeaders() error {
conf := c.rawConfig.Load()
if conf == nil {
return fmt.Errorf("failed to load core raw config")
}
lns := conf.(*server.Config).Listeners
if lns == nil {
return fmt.Errorf("no listener configured")
}
uiHeaders, err := c.UIHeaders()
if err != nil {
return err
}
c.customListenerHeader.Store(NewListenerCustomHeader(lns, c.logger, uiHeaders))
return nil
}
// SanitizedConfig returns a sanitized version of the current config.
// See server.Config.Sanitized for specific values omitted.
func (c *Core) SanitizedConfig() map[string]interface{} {
conf := c.rawConfig.Load()
if conf == nil {
return nil
}
return conf.(*server.Config).Sanitized()
}
// LogFormat returns the log format current in use.
func (c *Core) LogFormat() string {
conf := c.rawConfig.Load()
return conf.(*server.Config).LogFormat
}
// MetricsHelper returns the global metrics helper which allows external
// packages to access Vault's internal metrics.
func (c *Core) MetricsHelper() *metricsutil.MetricsHelper {
return c.metricsHelper
}
// MetricSink returns the metrics wrapper with which Core has been configured.
func (c *Core) MetricSink() *metricsutil.ClusterMetricSink {
return c.metricSink
}
2018-11-07 01:21:24 +00:00
// BuiltinRegistry is an interface that allows the "vault" package to use
// the registry of builtin plugins without getting an import cycle. It
// also allows for mocking the registry easily.
type BuiltinRegistry interface {
Contains(name string, pluginType consts.PluginType) bool
Get(name string, pluginType consts.PluginType) (func() (interface{}, error), bool)
Keys(pluginType consts.PluginType) []string
}
func (c *Core) AuditLogger() AuditLogger {
return &basicAuditor{c: c}
}
type FeatureFlags struct {
NamespacesCubbyholesLocal bool `json:"namespace_cubbyholes_local"`
}
func (c *Core) persistFeatureFlags(ctx context.Context) error {
if !c.PR1103disabled {
c.logger.Debug("persisting feature flags")
json, err := jsonutil.EncodeJSON(&FeatureFlags{NamespacesCubbyholesLocal: !c.PR1103disabled})
if err != nil {
return err
}
return c.barrier.Put(ctx, &logical.StorageEntry{
Key: consts.CoreFeatureFlagPath,
Value: json,
})
}
return nil
}
func (c *Core) readFeatureFlags(ctx context.Context) (*FeatureFlags, error) {
entry, err := c.barrier.Get(ctx, consts.CoreFeatureFlagPath)
if err != nil {
return nil, err
}
var flags FeatureFlags
if entry != nil {
err = jsonutil.DecodeJSON(entry.Value, &flags)
if err != nil {
return nil, err
}
}
return &flags, nil
}
2020-06-26 21:13:16 +00:00
// MatchingMount returns the path of the mount that will be responsible for
// handling the given request path.
func (c *Core) MatchingMount(ctx context.Context, reqPath string) string {
return c.router.MatchingMount(ctx, reqPath)
}
func (c *Core) setupQuotas(ctx context.Context, isPerfStandby bool) error {
if c.quotaManager == nil {
return nil
}
return c.quotaManager.Setup(ctx, c.systemBarrierView, isPerfStandby, c.IsDRSecondary())
2020-06-26 21:13:16 +00:00
}
// ApplyRateLimitQuota checks the request against all the applicable quota rules.
// If the given request's path is exempt, no rate limiting will be applied.
func (c *Core) ApplyRateLimitQuota(ctx context.Context, req *quotas.Request) (quotas.Response, error) {
2020-06-26 21:13:16 +00:00
req.Type = quotas.TypeRateLimit
resp := quotas.Response{
Allowed: true,
Headers: make(map[string]string),
}
if c.quotaManager != nil {
// skip rate limit checks for paths that are exempt from rate limiting
if c.quotaManager.RateLimitPathExempt(req.Path) {
return resp, nil
}
return c.quotaManager.ApplyQuota(ctx, req)
}
return resp, nil
2020-06-26 21:13:16 +00:00
}
// RateLimitAuditLoggingEnabled returns if the quota configuration allows audit
// logging of request rejections due to rate limiting quota rule violations.
func (c *Core) RateLimitAuditLoggingEnabled() bool {
if c.quotaManager != nil {
return c.quotaManager.RateLimitAuditLoggingEnabled()
}
return false
2020-06-26 21:13:16 +00:00
}
// RateLimitResponseHeadersEnabled returns if the quota configuration allows for
// rate limit quota HTTP headers to be added to responses.
func (c *Core) RateLimitResponseHeadersEnabled() bool {
if c.quotaManager != nil {
return c.quotaManager.RateLimitResponseHeadersEnabled()
}
return false
}
func (c *Core) KeyRotateGracePeriod() time.Duration {
return time.Duration(atomic.LoadInt64(c.keyRotateGracePeriod))
}
func (c *Core) SetKeyRotateGracePeriod(t time.Duration) {
atomic.StoreInt64(c.keyRotateGracePeriod, int64(t))
}
// Periodically test whether to automatically rotate the barrier key
func (c *Core) autoRotateBarrierLoop(ctx context.Context) {
t := time.NewTicker(autoRotateCheckInterval)
for {
select {
case <-t.C:
c.checkBarrierAutoRotate(ctx)
case <-ctx.Done():
t.Stop()
return
}
}
}
func (c *Core) checkBarrierAutoRotate(ctx context.Context) {
c.stateLock.RLock()
defer c.stateLock.RUnlock()
if c.isPrimary() {
reason, err := c.barrier.CheckBarrierAutoRotate(ctx)
if err != nil {
lf := c.logger.Error
if strings.HasSuffix(err.Error(), "context canceled") {
lf = c.logger.Debug
}
lf("error in barrier auto rotation", "error", err)
return
}
if reason != "" {
// Time to rotate. Invoke the rotation handler in order to both rotate and create
// the replication canary
c.logger.Info("automatic barrier key rotation triggered", "reason", reason)
_, err := c.systemBackend.handleRotate(ctx, nil, nil)
if err != nil {
c.logger.Error("error automatically rotating barrier key", "error", err)
} else {
metrics.IncrCounter(barrierRotationsMetric, 1)
}
}
}
}
func (c *Core) isPrimary() bool {
return !c.ReplicationState().HasState(consts.ReplicationPerformanceSecondary | consts.ReplicationDRSecondary)
}
type LicenseState struct {
State string
ExpiryTime time.Time
Terminated bool
}
type MFACachedAuthResponse struct {
CachedAuth *logical.Auth
RequestPath string
RequestNSID string
RequestNSPath string
RequestConnRemoteAddr string
TimeOfStorage time.Time
RequestID string
}
func (c *Core) setupCachedMFAResponseAuth() {
c.mfaResponseAuthQueueLock.Lock()
c.mfaResponseAuthQueue = NewLoginMFAPriorityQueue()
mfaQueue := c.mfaResponseAuthQueue
c.mfaResponseAuthQueueLock.Unlock()
ctx := c.activeContext
go func() {
ticker := time.Tick(5 * time.Second)
for {
select {
case <-ctx.Done():
return
case <-ticker:
err := mfaQueue.RemoveExpiredMfaAuthResponse(defaultMFAAuthResponseTTL, time.Now())
if err != nil {
c.Logger().Error("failed to remove stale MFA auth response", "error", err)
}
}
}
}()
return
}
// PopMFAResponseAuthByID pops an item from the mfaResponseAuthQueue by ID
// it returns the cached auth response or an error
func (c *Core) PopMFAResponseAuthByID(reqID string) (*MFACachedAuthResponse, error) {
c.mfaResponseAuthQueueLock.Lock()
defer c.mfaResponseAuthQueueLock.Unlock()
return c.mfaResponseAuthQueue.PopByKey(reqID)
}
// SaveMFAResponseAuth pushes an MFACachedAuthResponse to the mfaResponseAuthQueue.
// it returns an error in case of failure
func (c *Core) SaveMFAResponseAuth(respAuth *MFACachedAuthResponse) error {
c.mfaResponseAuthQueueLock.Lock()
defer c.mfaResponseAuthQueueLock.Unlock()
return c.mfaResponseAuthQueue.Push(respAuth)
}
type InFlightRequests struct {
InFlightReqMap *sync.Map
InFlightReqCount *uberAtomic.Uint64
}
type InFlightReqData struct {
StartTime time.Time `json:"start_time"`
ClientRemoteAddr string `json:"client_remote_address"`
ReqPath string `json:"request_path"`
Method string `json:"request_method"`
ClientID string `json:"client_id"`
}
func (c *Core) StoreInFlightReqData(reqID string, data InFlightReqData) {
c.inFlightReqData.InFlightReqMap.Store(reqID, data)
c.inFlightReqData.InFlightReqCount.Inc()
}
// FinalizeInFlightReqData is going log the completed request if the
// corresponding server config option is enabled. It also removes the
// request from the inFlightReqMap and decrement the number of in-flight
// requests by one.
func (c *Core) FinalizeInFlightReqData(reqID string, statusCode int) {
if c.logRequestsLevel != nil && c.logRequestsLevel.Load() != 0 {
c.LogCompletedRequests(reqID, statusCode)
}
c.inFlightReqData.InFlightReqMap.Delete(reqID)
c.inFlightReqData.InFlightReqCount.Dec()
}
// LoadInFlightReqData creates a snapshot map of the current
// in-flight requests
func (c *Core) LoadInFlightReqData() map[string]InFlightReqData {
currentInFlightReqMap := make(map[string]InFlightReqData)
c.inFlightReqData.InFlightReqMap.Range(func(key, value interface{}) bool {
// there is only one writer to this map, so skip checking for errors
v := value.(InFlightReqData)
currentInFlightReqMap[key.(string)] = v
return true
})
return currentInFlightReqMap
}
// UpdateInFlightReqData updates the data for a specific reqID with
// the clientID
func (c *Core) UpdateInFlightReqData(reqID, clientID string) {
v, ok := c.inFlightReqData.InFlightReqMap.Load(reqID)
if !ok {
c.Logger().Trace("failed to retrieve request with ID", "request_id", reqID)
return
}
// there is only one writer to this map, so skip checking for errors
reqData := v.(InFlightReqData)
reqData.ClientID = clientID
c.inFlightReqData.InFlightReqMap.Store(reqID, reqData)
}
// LogCompletedRequests Logs the completed request to the server logs
func (c *Core) LogCompletedRequests(reqID string, statusCode int) {
logLevel := log.Level(c.logRequestsLevel.Load())
v, ok := c.inFlightReqData.InFlightReqMap.Load(reqID)
if !ok {
c.logger.Log(logLevel, fmt.Sprintf("failed to retrieve request with ID %v", reqID))
return
}
// there is only one writer to this map, so skip checking for errors
reqData := v.(InFlightReqData)
c.logger.Log(logLevel, "completed_request",
"start_time", reqData.StartTime.Format(time.RFC3339),
"duration", fmt.Sprintf("%dms", time.Now().Sub(reqData.StartTime).Milliseconds()),
"client_id", reqData.ClientID,
"client_address", reqData.ClientRemoteAddr, "status_code", statusCode, "request_path", reqData.ReqPath,
"request_method", reqData.Method)
}
func (c *Core) ReloadLogRequestsLevel() {
conf := c.rawConfig.Load()
if conf == nil {
return
}
infoLevel := conf.(*server.Config).LogRequestsLevel
switch {
case log.LevelFromString(infoLevel) > log.NoLevel && log.LevelFromString(infoLevel) < log.Off:
c.logRequestsLevel.Store(int32(log.LevelFromString(infoLevel)))
case infoLevel != "":
c.logger.Warn("invalid log_requests_level", "level", infoLevel)
}
}
type PeerNode struct {
Hostname string `json:"hostname"`
APIAddress string `json:"api_address"`
ClusterAddress string `json:"cluster_address"`
LastEcho time.Time `json:"last_echo"`
}
// GetHAPeerNodesCached returns the nodes that've sent us Echo requests recently.
func (c *Core) GetHAPeerNodesCached() []PeerNode {
var nodes []PeerNode
for itemClusterAddr, item := range c.clusterPeerClusterAddrsCache.Items() {
info := item.Object.(nodeHAConnectionInfo)
nodes = append(nodes, PeerNode{
Hostname: info.nodeInfo.Hostname,
APIAddress: info.nodeInfo.ApiAddr,
ClusterAddress: itemClusterAddr,
LastEcho: info.lastHeartbeat,
})
}
return nodes
}