2368 lines
69 KiB
Go
2368 lines
69 KiB
Go
package vault
|
|
|
|
import (
|
|
"context"
|
|
"crypto/ecdsa"
|
|
"crypto/subtle"
|
|
"crypto/x509"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"path/filepath"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/armon/go-metrics"
|
|
log "github.com/hashicorp/go-hclog"
|
|
sockaddr "github.com/hashicorp/go-sockaddr"
|
|
|
|
"google.golang.org/grpc"
|
|
|
|
"github.com/hashicorp/errwrap"
|
|
"github.com/hashicorp/go-multierror"
|
|
"github.com/hashicorp/go-uuid"
|
|
"github.com/hashicorp/vault/audit"
|
|
"github.com/hashicorp/vault/helper/consts"
|
|
"github.com/hashicorp/vault/helper/errutil"
|
|
"github.com/hashicorp/vault/helper/identity"
|
|
"github.com/hashicorp/vault/helper/jsonutil"
|
|
"github.com/hashicorp/vault/helper/logging"
|
|
"github.com/hashicorp/vault/helper/mlock"
|
|
"github.com/hashicorp/vault/helper/reload"
|
|
"github.com/hashicorp/vault/helper/tlsutil"
|
|
"github.com/hashicorp/vault/logical"
|
|
"github.com/hashicorp/vault/physical"
|
|
"github.com/hashicorp/vault/shamir"
|
|
cache "github.com/patrickmn/go-cache"
|
|
)
|
|
|
|
const (
|
|
// coreLockPath is the path used to acquire a coordinating lock
|
|
// for a highly-available deploy.
|
|
coreLockPath = "core/lock"
|
|
|
|
// The poison pill is used as a check during certain scenarios to indicate
|
|
// to standby nodes that they should seal
|
|
poisonPillPath = "core/poison-pill"
|
|
|
|
// coreLeaderPrefix is the prefix used for the UUID that contains
|
|
// the currently elected leader.
|
|
coreLeaderPrefix = "core/leader/"
|
|
|
|
// knownPrimaryAddrsPrefix is used to store last-known cluster address
|
|
// information for primaries
|
|
knownPrimaryAddrsPrefix = "core/primary-addrs/"
|
|
|
|
// lockRetryInterval is the interval we re-attempt to acquire the
|
|
// HA lock if an error is encountered
|
|
lockRetryInterval = 10 * time.Second
|
|
|
|
// leaderCheckInterval is how often a standby checks for a new leader
|
|
leaderCheckInterval = 2500 * time.Millisecond
|
|
|
|
// keyRotateCheckInterval is how often a standby checks for a key
|
|
// rotation taking place.
|
|
keyRotateCheckInterval = 30 * time.Second
|
|
|
|
// keyRotateGracePeriod is how long we allow an upgrade path
|
|
// for standby instances before we delete the upgrade keys
|
|
keyRotateGracePeriod = 2 * time.Minute
|
|
|
|
// leaderPrefixCleanDelay is how long to wait between deletions
|
|
// of orphaned leader keys, to prevent slamming the backend.
|
|
leaderPrefixCleanDelay = 200 * time.Millisecond
|
|
|
|
// coreKeyringCanaryPath is used as a canary to indicate to replicated
|
|
// clusters that they need to perform a rekey operation synchronously; this
|
|
// isn't keyring-canary to avoid ignoring it when ignoring core/keyring
|
|
coreKeyringCanaryPath = "core/canary-keyring"
|
|
)
|
|
|
|
var (
|
|
// ErrAlreadyInit is returned if the core is already
|
|
// initialized. This prevents a re-initialization.
|
|
ErrAlreadyInit = errors.New("Vault is already initialized")
|
|
|
|
// ErrNotInit is returned if a non-initialized barrier
|
|
// is attempted to be unsealed.
|
|
ErrNotInit = errors.New("Vault is not initialized")
|
|
|
|
// ErrInternalError is returned when we don't want to leak
|
|
// any information about an internal error
|
|
ErrInternalError = errors.New("internal error")
|
|
|
|
// ErrHANotEnabled is returned if the operation only makes sense
|
|
// in an HA setting
|
|
ErrHANotEnabled = errors.New("Vault is not configured for highly-available mode")
|
|
|
|
// manualStepDownSleepPeriod is how long to sleep after a user-initiated
|
|
// step down of the active node, to prevent instantly regrabbing the lock.
|
|
// It's var not const so that tests can manipulate it.
|
|
manualStepDownSleepPeriod = 10 * time.Second
|
|
|
|
// Functions only in the Enterprise version
|
|
enterprisePostUnseal = enterprisePostUnsealImpl
|
|
enterprisePreSeal = enterprisePreSealImpl
|
|
startReplication = startReplicationImpl
|
|
stopReplication = stopReplicationImpl
|
|
LastRemoteWAL = lastRemoteWALImpl
|
|
)
|
|
|
|
// NonFatalError is an error that can be returned during NewCore that should be
|
|
// displayed but not cause a program exit
|
|
type NonFatalError struct {
|
|
Err error
|
|
}
|
|
|
|
func (e *NonFatalError) WrappedErrors() []error {
|
|
return []error{e.Err}
|
|
}
|
|
|
|
func (e *NonFatalError) Error() string {
|
|
return e.Err.Error()
|
|
}
|
|
|
|
// ErrInvalidKey is returned if there is a user-based error with a provided
|
|
// unseal key. This will be shown to the user, so should not contain
|
|
// information that is sensitive.
|
|
type ErrInvalidKey struct {
|
|
Reason string
|
|
}
|
|
|
|
func (e *ErrInvalidKey) Error() string {
|
|
return fmt.Sprintf("invalid key: %v", e.Reason)
|
|
}
|
|
|
|
type activeAdvertisement struct {
|
|
RedirectAddr string `json:"redirect_addr"`
|
|
ClusterAddr string `json:"cluster_addr,omitempty"`
|
|
ClusterCert []byte `json:"cluster_cert,omitempty"`
|
|
ClusterKeyParams *clusterKeyParams `json:"cluster_key_params,omitempty"`
|
|
}
|
|
|
|
type unlockInformation struct {
|
|
Parts [][]byte
|
|
Nonce string
|
|
}
|
|
|
|
// Core is used as the central manager of Vault activity. It is the primary point of
|
|
// interface for API handlers and is responsible for managing the logical and physical
|
|
// backends, router, security barrier, and audit trails.
|
|
type Core struct {
|
|
// N.B.: This is used to populate a dev token down replication, as
|
|
// otherwise, after replication is started, a dev would have to go through
|
|
// the generate-root process simply to talk to the new follower cluster.
|
|
devToken string
|
|
|
|
// HABackend may be available depending on the physical backend
|
|
ha physical.HABackend
|
|
|
|
// redirectAddr is the address we advertise as leader if held
|
|
redirectAddr string
|
|
|
|
// clusterAddr is the address we use for clustering
|
|
clusterAddr string
|
|
|
|
// physical backend is the un-trusted backend with durable data
|
|
physical physical.Backend
|
|
|
|
// Our Seal, for seal configuration information
|
|
seal Seal
|
|
|
|
// barrier is the security barrier wrapping the physical backend
|
|
barrier SecurityBarrier
|
|
|
|
// router is responsible for managing the mount points for logical backends.
|
|
router *Router
|
|
|
|
// logicalBackends is the mapping of backends to use for this core
|
|
logicalBackends map[string]logical.Factory
|
|
|
|
// credentialBackends is the mapping of backends to use for this core
|
|
credentialBackends map[string]logical.Factory
|
|
|
|
// auditBackends is the mapping of backends to use for this core
|
|
auditBackends map[string]audit.Factory
|
|
|
|
// stateLock protects mutable state
|
|
stateLock sync.RWMutex
|
|
sealed bool
|
|
|
|
standby bool
|
|
standbyDoneCh chan struct{}
|
|
standbyStopCh chan struct{}
|
|
manualStepDownCh chan struct{}
|
|
keepHALockOnStepDown uint32
|
|
heldHALock physical.Lock
|
|
|
|
// unlockInfo has the keys provided to Unseal until the threshold number of parts is available, as well as the operation nonce
|
|
unlockInfo *unlockInformation
|
|
|
|
// generateRootProgress holds the shares until we reach enough
|
|
// to verify the master key
|
|
generateRootConfig *GenerateRootConfig
|
|
generateRootProgress [][]byte
|
|
generateRootLock sync.Mutex
|
|
|
|
// These variables holds the config and shares we have until we reach
|
|
// enough to verify the appropriate master key. Note that the same lock is
|
|
// used; this isn't time-critical so this shouldn't be a problem.
|
|
barrierRekeyConfig *SealConfig
|
|
recoveryRekeyConfig *SealConfig
|
|
rekeyLock sync.RWMutex
|
|
|
|
// mounts is loaded after unseal since it is a protected
|
|
// configuration
|
|
mounts *MountTable
|
|
|
|
// mountsLock is used to ensure that the mounts table does not
|
|
// change underneath a calling function
|
|
mountsLock sync.RWMutex
|
|
|
|
// auth is loaded after unseal since it is a protected
|
|
// configuration
|
|
auth *MountTable
|
|
|
|
// authLock is used to ensure that the auth table does not
|
|
// change underneath a calling function
|
|
authLock sync.RWMutex
|
|
|
|
// audit is loaded after unseal since it is a protected
|
|
// configuration
|
|
audit *MountTable
|
|
|
|
// auditLock is used to ensure that the audit table does not
|
|
// change underneath a calling function
|
|
auditLock sync.RWMutex
|
|
|
|
// auditBroker is used to ingest the audit events and fan
|
|
// out into the configured audit backends
|
|
auditBroker *AuditBroker
|
|
|
|
// auditedHeaders is used to configure which http headers
|
|
// can be output in the audit logs
|
|
auditedHeaders *AuditedHeadersConfig
|
|
|
|
// systemBackend is the backend which is used to manage internal operations
|
|
systemBackend *SystemBackend
|
|
|
|
// systemBarrierView is the barrier view for the system backend
|
|
systemBarrierView *BarrierView
|
|
|
|
// expiration manager is used for managing LeaseIDs,
|
|
// renewal, expiration and revocation
|
|
expiration *ExpirationManager
|
|
|
|
// rollback manager is used to run rollbacks periodically
|
|
rollback *RollbackManager
|
|
|
|
// policy store is used to manage named ACL policies
|
|
policyStore *PolicyStore
|
|
|
|
// token store is used to manage authentication tokens
|
|
tokenStore *TokenStore
|
|
|
|
// identityStore is used to manage client entities
|
|
identityStore *IdentityStore
|
|
|
|
// metricsCh is used to stop the metrics streaming
|
|
metricsCh chan struct{}
|
|
|
|
// metricsMutex is used to prevent a race condition between
|
|
// metrics emission and sealing leading to a nil pointer
|
|
metricsMutex sync.Mutex
|
|
|
|
defaultLeaseTTL time.Duration
|
|
maxLeaseTTL time.Duration
|
|
|
|
logger log.Logger
|
|
|
|
// cachingDisabled indicates whether caches are disabled
|
|
cachingDisabled bool
|
|
// Cache stores the actual cache; we always have this but may bypass it if
|
|
// disabled
|
|
physicalCache physical.ToggleablePurgemonster
|
|
|
|
// reloadFuncs is a map containing reload functions
|
|
reloadFuncs map[string][]reload.ReloadFunc
|
|
|
|
// reloadFuncsLock controls access to the funcs
|
|
reloadFuncsLock sync.RWMutex
|
|
|
|
// wrappingJWTKey is the key used for generating JWTs containing response
|
|
// wrapping information
|
|
wrappingJWTKey *ecdsa.PrivateKey
|
|
|
|
//
|
|
// Cluster information
|
|
//
|
|
// Name
|
|
clusterName string
|
|
// Specific cipher suites to use for clustering, if any
|
|
clusterCipherSuites []uint16
|
|
// Used to modify cluster parameters
|
|
clusterParamsLock sync.RWMutex
|
|
// The private key stored in the barrier used for establishing
|
|
// mutually-authenticated connections between Vault cluster members
|
|
localClusterPrivateKey *atomic.Value
|
|
// The local cluster cert
|
|
localClusterCert *atomic.Value
|
|
// The parsed form of the local cluster cert
|
|
localClusterParsedCert *atomic.Value
|
|
// The TCP addresses we should use for clustering
|
|
clusterListenerAddrs []*net.TCPAddr
|
|
// The handler to use for request forwarding
|
|
clusterHandler http.Handler
|
|
// Tracks whether cluster listeners are running, e.g. it's safe to send a
|
|
// shutdown down the channel
|
|
clusterListenersRunning bool
|
|
// Shutdown channel for the cluster listeners
|
|
clusterListenerShutdownCh chan struct{}
|
|
// Shutdown success channel. We need this to be done serially to ensure
|
|
// that binds are removed before they might be reinstated.
|
|
clusterListenerShutdownSuccessCh chan struct{}
|
|
// Write lock used to ensure that we don't have multiple connections adjust
|
|
// this value at the same time
|
|
requestForwardingConnectionLock sync.RWMutex
|
|
// Most recent leader UUID. Used to avoid repeatedly JSON parsing the same
|
|
// values.
|
|
clusterLeaderUUID string
|
|
// Most recent leader redirect addr
|
|
clusterLeaderRedirectAddr string
|
|
// Most recent leader cluster addr
|
|
clusterLeaderClusterAddr string
|
|
// Lock for the cluster leader values
|
|
clusterLeaderParamsLock sync.RWMutex
|
|
// Info on cluster members
|
|
clusterPeerClusterAddrsCache *cache.Cache
|
|
// Stores whether we currently have a server running
|
|
rpcServerActive *uint32
|
|
// The context for the client
|
|
rpcClientConnContext context.Context
|
|
// The function for canceling the client connection
|
|
rpcClientConnCancelFunc context.CancelFunc
|
|
// The grpc ClientConn for RPC calls
|
|
rpcClientConn *grpc.ClientConn
|
|
// The grpc forwarding client
|
|
rpcForwardingClient *forwardingClient
|
|
|
|
// CORS Information
|
|
corsConfig *CORSConfig
|
|
|
|
// The active set of upstream cluster addresses; stored via the Echo
|
|
// mechanism, loaded by the balancer
|
|
atomicPrimaryClusterAddrs *atomic.Value
|
|
|
|
atomicPrimaryFailoverAddrs *atomic.Value
|
|
// replicationState keeps the current replication state cached for quick
|
|
// lookup; activeNodeReplicationState stores the active value on standbys
|
|
replicationState *uint32
|
|
activeNodeReplicationState *uint32
|
|
|
|
// uiConfig contains UI configuration
|
|
uiConfig *UIConfig
|
|
|
|
// rawEnabled indicates whether the Raw endpoint is enabled
|
|
rawEnabled bool
|
|
|
|
// pluginDirectory is the location vault will look for plugin binaries
|
|
pluginDirectory string
|
|
|
|
// pluginCatalog is used to manage plugin configurations
|
|
pluginCatalog *PluginCatalog
|
|
|
|
enableMlock bool
|
|
|
|
// This can be used to trigger operations to stop running when Vault is
|
|
// going to be shut down, stepped down, or sealed
|
|
activeContext context.Context
|
|
activeContextCancelFunc context.CancelFunc
|
|
|
|
// Stores the sealunwrapper for downgrade needs
|
|
sealUnwrapper physical.Backend
|
|
|
|
// Stores any funcs that should be run on successful postUnseal
|
|
postUnsealFuncs []func()
|
|
}
|
|
|
|
// CoreConfig is used to parameterize a core
|
|
type CoreConfig struct {
|
|
DevToken string `json:"dev_token" structs:"dev_token" mapstructure:"dev_token"`
|
|
|
|
LogicalBackends map[string]logical.Factory `json:"logical_backends" structs:"logical_backends" mapstructure:"logical_backends"`
|
|
|
|
CredentialBackends map[string]logical.Factory `json:"credential_backends" structs:"credential_backends" mapstructure:"credential_backends"`
|
|
|
|
AuditBackends map[string]audit.Factory `json:"audit_backends" structs:"audit_backends" mapstructure:"audit_backends"`
|
|
|
|
Physical physical.Backend `json:"physical" structs:"physical" mapstructure:"physical"`
|
|
|
|
// May be nil, which disables HA operations
|
|
HAPhysical physical.HABackend `json:"ha_physical" structs:"ha_physical" mapstructure:"ha_physical"`
|
|
|
|
Seal Seal `json:"seal" structs:"seal" mapstructure:"seal"`
|
|
|
|
Logger log.Logger `json:"logger" structs:"logger" mapstructure:"logger"`
|
|
|
|
// Disables the LRU cache on the physical backend
|
|
DisableCache bool `json:"disable_cache" structs:"disable_cache" mapstructure:"disable_cache"`
|
|
|
|
// Disables mlock syscall
|
|
DisableMlock bool `json:"disable_mlock" structs:"disable_mlock" mapstructure:"disable_mlock"`
|
|
|
|
// Custom cache size for the LRU cache on the physical backend, or zero for default
|
|
CacheSize int `json:"cache_size" structs:"cache_size" mapstructure:"cache_size"`
|
|
|
|
// Set as the leader address for HA
|
|
RedirectAddr string `json:"redirect_addr" structs:"redirect_addr" mapstructure:"redirect_addr"`
|
|
|
|
// Set as the cluster address for HA
|
|
ClusterAddr string `json:"cluster_addr" structs:"cluster_addr" mapstructure:"cluster_addr"`
|
|
|
|
DefaultLeaseTTL time.Duration `json:"default_lease_ttl" structs:"default_lease_ttl" mapstructure:"default_lease_ttl"`
|
|
|
|
MaxLeaseTTL time.Duration `json:"max_lease_ttl" structs:"max_lease_ttl" mapstructure:"max_lease_ttl"`
|
|
|
|
ClusterName string `json:"cluster_name" structs:"cluster_name" mapstructure:"cluster_name"`
|
|
|
|
ClusterCipherSuites string `json:"cluster_cipher_suites" structs:"cluster_cipher_suites" mapstructure:"cluster_cipher_suites"`
|
|
|
|
EnableUI bool `json:"ui" structs:"ui" mapstructure:"ui"`
|
|
|
|
// Enable the raw endpoint
|
|
EnableRaw bool `json:"enable_raw" structs:"enable_raw" mapstructure:"enable_raw"`
|
|
|
|
PluginDirectory string `json:"plugin_directory" structs:"plugin_directory" mapstructure:"plugin_directory"`
|
|
|
|
ReloadFuncs *map[string][]reload.ReloadFunc
|
|
ReloadFuncsLock *sync.RWMutex
|
|
}
|
|
|
|
// NewCore is used to construct a new core
|
|
func NewCore(conf *CoreConfig) (*Core, error) {
|
|
if conf.HAPhysical != nil && conf.HAPhysical.HAEnabled() {
|
|
if conf.RedirectAddr == "" {
|
|
return nil, fmt.Errorf("missing API address, please set in configuration or via environment")
|
|
}
|
|
}
|
|
|
|
if conf.DefaultLeaseTTL == 0 {
|
|
conf.DefaultLeaseTTL = defaultLeaseTTL
|
|
}
|
|
if conf.MaxLeaseTTL == 0 {
|
|
conf.MaxLeaseTTL = maxLeaseTTL
|
|
}
|
|
if conf.DefaultLeaseTTL > conf.MaxLeaseTTL {
|
|
return nil, fmt.Errorf("cannot have DefaultLeaseTTL larger than MaxLeaseTTL")
|
|
}
|
|
|
|
// Validate the advertise addr if its given to us
|
|
if conf.RedirectAddr != "" {
|
|
u, err := url.Parse(conf.RedirectAddr)
|
|
if err != nil {
|
|
return nil, errwrap.Wrapf("redirect address is not valid url: {{err}}", err)
|
|
}
|
|
|
|
if u.Scheme == "" {
|
|
return nil, fmt.Errorf("redirect address must include scheme (ex. 'http')")
|
|
}
|
|
}
|
|
|
|
// Make a default logger if not provided
|
|
if conf.Logger == nil {
|
|
conf.Logger = logging.NewVaultLogger(log.Trace)
|
|
}
|
|
|
|
// Setup the core
|
|
c := &Core{
|
|
devToken: conf.DevToken,
|
|
physical: conf.Physical,
|
|
redirectAddr: conf.RedirectAddr,
|
|
clusterAddr: conf.ClusterAddr,
|
|
seal: conf.Seal,
|
|
router: NewRouter(),
|
|
sealed: true,
|
|
standby: true,
|
|
logger: conf.Logger.Named("core"),
|
|
defaultLeaseTTL: conf.DefaultLeaseTTL,
|
|
maxLeaseTTL: conf.MaxLeaseTTL,
|
|
cachingDisabled: conf.DisableCache,
|
|
clusterName: conf.ClusterName,
|
|
clusterListenerShutdownCh: make(chan struct{}),
|
|
clusterListenerShutdownSuccessCh: make(chan struct{}),
|
|
clusterPeerClusterAddrsCache: cache.New(3*HeartbeatInterval, time.Second),
|
|
enableMlock: !conf.DisableMlock,
|
|
rawEnabled: conf.EnableRaw,
|
|
replicationState: new(uint32),
|
|
rpcServerActive: new(uint32),
|
|
atomicPrimaryClusterAddrs: new(atomic.Value),
|
|
atomicPrimaryFailoverAddrs: new(atomic.Value),
|
|
localClusterPrivateKey: new(atomic.Value),
|
|
localClusterCert: new(atomic.Value),
|
|
localClusterParsedCert: new(atomic.Value),
|
|
activeNodeReplicationState: new(uint32),
|
|
}
|
|
|
|
atomic.StoreUint32(c.replicationState, uint32(consts.ReplicationDRDisabled|consts.ReplicationPerformanceDisabled))
|
|
c.localClusterCert.Store(([]byte)(nil))
|
|
c.localClusterParsedCert.Store((*x509.Certificate)(nil))
|
|
c.localClusterPrivateKey.Store((*ecdsa.PrivateKey)(nil))
|
|
|
|
if conf.ClusterCipherSuites != "" {
|
|
suites, err := tlsutil.ParseCiphers(conf.ClusterCipherSuites)
|
|
if err != nil {
|
|
return nil, errwrap.Wrapf("error parsing cluster cipher suites: {{err}}", err)
|
|
}
|
|
c.clusterCipherSuites = suites
|
|
}
|
|
|
|
// Load CORS config and provide a value for the core field.
|
|
c.corsConfig = &CORSConfig{core: c}
|
|
|
|
phys := conf.Physical
|
|
_, txnOK := conf.Physical.(physical.Transactional)
|
|
if c.seal == nil {
|
|
c.seal = NewDefaultSeal()
|
|
}
|
|
c.seal.SetCore(c)
|
|
|
|
c.sealUnwrapper = NewSealUnwrapper(phys, conf.Logger.ResetNamed("storage.sealunwrapper"))
|
|
|
|
var ok bool
|
|
|
|
// Wrap the physical backend in a cache layer if enabled
|
|
if txnOK {
|
|
c.physical = physical.NewTransactionalCache(c.sealUnwrapper, conf.CacheSize, conf.Logger.ResetNamed("storage.cache"))
|
|
} else {
|
|
c.physical = physical.NewCache(c.sealUnwrapper, conf.CacheSize, conf.Logger.ResetNamed("storage.cache"))
|
|
}
|
|
c.physicalCache = c.physical.(physical.ToggleablePurgemonster)
|
|
|
|
if !conf.DisableMlock {
|
|
// Ensure our memory usage is locked into physical RAM
|
|
if err := mlock.LockMemory(); err != nil {
|
|
return nil, fmt.Errorf(
|
|
"Failed to lock memory: %v\n\n"+
|
|
"This usually means that the mlock syscall is not available.\n"+
|
|
"Vault uses mlock to prevent memory from being swapped to\n"+
|
|
"disk. This requires root privileges as well as a machine\n"+
|
|
"that supports mlock. Please enable mlock on your system or\n"+
|
|
"disable Vault from using it. To disable Vault from using it,\n"+
|
|
"set the `disable_mlock` configuration option in your configuration\n"+
|
|
"file.",
|
|
err)
|
|
}
|
|
}
|
|
|
|
var err error
|
|
if conf.PluginDirectory != "" {
|
|
c.pluginDirectory, err = filepath.Abs(conf.PluginDirectory)
|
|
if err != nil {
|
|
return nil, errwrap.Wrapf("core setup failed, could not verify plugin directory: {{err}}", err)
|
|
}
|
|
}
|
|
|
|
// Construct a new AES-GCM barrier
|
|
c.barrier, err = NewAESGCMBarrier(c.physical)
|
|
if err != nil {
|
|
return nil, errwrap.Wrapf("barrier setup failed: {{err}}", err)
|
|
}
|
|
|
|
if conf.HAPhysical != nil && conf.HAPhysical.HAEnabled() {
|
|
c.ha = conf.HAPhysical
|
|
}
|
|
|
|
// We create the funcs here, then populate the given config with it so that
|
|
// the caller can share state
|
|
conf.ReloadFuncsLock = &c.reloadFuncsLock
|
|
c.reloadFuncsLock.Lock()
|
|
c.reloadFuncs = make(map[string][]reload.ReloadFunc)
|
|
c.reloadFuncsLock.Unlock()
|
|
conf.ReloadFuncs = &c.reloadFuncs
|
|
|
|
// Setup the backends
|
|
logicalBackends := make(map[string]logical.Factory)
|
|
for k, f := range conf.LogicalBackends {
|
|
logicalBackends[k] = f
|
|
}
|
|
_, ok = logicalBackends["kv"]
|
|
if !ok {
|
|
logicalBackends["kv"] = PassthroughBackendFactory
|
|
}
|
|
logicalBackends["cubbyhole"] = CubbyholeBackendFactory
|
|
logicalBackends["system"] = func(ctx context.Context, config *logical.BackendConfig) (logical.Backend, error) {
|
|
b := NewSystemBackend(c, conf.Logger.Named("system"))
|
|
if err := b.Setup(ctx, config); err != nil {
|
|
return nil, err
|
|
}
|
|
return b, nil
|
|
}
|
|
|
|
logicalBackends["identity"] = func(ctx context.Context, config *logical.BackendConfig) (logical.Backend, error) {
|
|
return NewIdentityStore(ctx, c, config, conf.Logger.Named("identity"))
|
|
}
|
|
|
|
c.logicalBackends = logicalBackends
|
|
|
|
credentialBackends := make(map[string]logical.Factory)
|
|
for k, f := range conf.CredentialBackends {
|
|
credentialBackends[k] = f
|
|
}
|
|
credentialBackends["token"] = func(ctx context.Context, config *logical.BackendConfig) (logical.Backend, error) {
|
|
return NewTokenStore(ctx, conf.Logger.Named("token"), c, config)
|
|
}
|
|
c.credentialBackends = credentialBackends
|
|
|
|
auditBackends := make(map[string]audit.Factory)
|
|
for k, f := range conf.AuditBackends {
|
|
auditBackends[k] = f
|
|
}
|
|
c.auditBackends = auditBackends
|
|
|
|
uiStoragePrefix := systemBarrierPrefix + "ui"
|
|
c.uiConfig = NewUIConfig(conf.EnableUI, physical.NewView(c.physical, uiStoragePrefix), NewBarrierView(c.barrier, uiStoragePrefix))
|
|
|
|
return c, nil
|
|
}
|
|
|
|
// Shutdown is invoked when the Vault instance is about to be terminated. It
|
|
// should not be accessible as part of an API call as it will cause an availability
|
|
// problem. It is only used to gracefully quit in the case of HA so that failover
|
|
// happens as quickly as possible.
|
|
func (c *Core) Shutdown() error {
|
|
c.logger.Debug("shutdown called")
|
|
c.stateLock.RLock()
|
|
// Tell any requests that know about this to stop
|
|
if c.activeContextCancelFunc != nil {
|
|
c.activeContextCancelFunc()
|
|
}
|
|
c.stateLock.RUnlock()
|
|
|
|
c.logger.Debug("shutdown initiating internal seal")
|
|
// Seal the Vault, causes a leader stepdown
|
|
c.stateLock.Lock()
|
|
defer c.stateLock.Unlock()
|
|
|
|
c.logger.Debug("shutdown running internal seal")
|
|
return c.sealInternal(false)
|
|
}
|
|
|
|
// CORSConfig returns the current CORS configuration
|
|
func (c *Core) CORSConfig() *CORSConfig {
|
|
return c.corsConfig
|
|
}
|
|
|
|
func (c *Core) GetContext() (context.Context, context.CancelFunc) {
|
|
c.stateLock.RLock()
|
|
defer c.stateLock.RUnlock()
|
|
|
|
return context.WithCancel(c.activeContext)
|
|
}
|
|
|
|
// LookupToken returns the properties of the token from the token store. This
|
|
// is particularly useful to fetch the accessor of the client token and get it
|
|
// populated in the logical request along with the client token. The accessor
|
|
// of the client token can get audit logged.
|
|
func (c *Core) LookupToken(token string) (*TokenEntry, error) {
|
|
if token == "" {
|
|
return nil, fmt.Errorf("missing client token")
|
|
}
|
|
|
|
c.stateLock.RLock()
|
|
defer c.stateLock.RUnlock()
|
|
if c.sealed {
|
|
return nil, consts.ErrSealed
|
|
}
|
|
if c.standby {
|
|
return nil, consts.ErrStandby
|
|
}
|
|
|
|
// Many tests don't have a token store running
|
|
if c.tokenStore == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
return c.tokenStore.Lookup(c.activeContext, token)
|
|
}
|
|
|
|
// fetchEntityAndDerivedPolicies returns the entity object for the given entity
|
|
// ID. If the entity is merged into a different entity object, the entity into
|
|
// which the given entity ID is merged into will be returned. This function
|
|
// also returns the cumulative list of policies that the entity is entitled to.
|
|
// This list includes the policies from the entity itself and from all the
|
|
// groups in which the given entity ID is a member of.
|
|
func (c *Core) fetchEntityAndDerivedPolicies(entityID string) (*identity.Entity, []string, error) {
|
|
if entityID == "" || c.identityStore == nil {
|
|
return nil, nil, nil
|
|
}
|
|
|
|
//c.logger.Debug("entity set on the token", "entity_id", te.EntityID)
|
|
|
|
// Fetch the entity
|
|
entity, err := c.identityStore.MemDBEntityByID(entityID, false)
|
|
if err != nil {
|
|
c.logger.Error("failed to lookup entity using its ID", "error", err)
|
|
return nil, nil, err
|
|
}
|
|
|
|
if entity == nil {
|
|
// If there was no corresponding entity object found, it is
|
|
// possible that the entity got merged into another entity. Try
|
|
// finding entity based on the merged entity index.
|
|
entity, err = c.identityStore.MemDBEntityByMergedEntityID(entityID, false)
|
|
if err != nil {
|
|
c.logger.Error("failed to lookup entity in merged entity ID index", "error", err)
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
|
|
var policies []string
|
|
if entity != nil {
|
|
//c.logger.Debug("entity successfully fetched; adding entity policies to token's policies to create ACL")
|
|
|
|
// Attach the policies on the entity
|
|
policies = append(policies, entity.Policies...)
|
|
|
|
groupPolicies, err := c.identityStore.groupPoliciesByEntityID(entity.ID)
|
|
if err != nil {
|
|
c.logger.Error("failed to fetch group policies", "error", err)
|
|
return nil, nil, err
|
|
}
|
|
|
|
// Attach the policies from all the groups
|
|
policies = append(policies, groupPolicies...)
|
|
}
|
|
|
|
return entity, policies, err
|
|
}
|
|
|
|
func (c *Core) fetchACLTokenEntryAndEntity(req *logical.Request) (*ACL, *TokenEntry, *identity.Entity, error) {
|
|
defer metrics.MeasureSince([]string{"core", "fetch_acl_and_token"}, time.Now())
|
|
|
|
// Ensure there is a client token
|
|
if req.ClientToken == "" {
|
|
return nil, nil, nil, fmt.Errorf("missing client token")
|
|
}
|
|
|
|
if c.tokenStore == nil {
|
|
c.logger.Error("token store is unavailable")
|
|
return nil, nil, nil, ErrInternalError
|
|
}
|
|
|
|
// Resolve the token policy
|
|
te, err := c.tokenStore.Lookup(c.activeContext, req.ClientToken)
|
|
if err != nil {
|
|
c.logger.Error("failed to lookup token", "error", err)
|
|
return nil, nil, nil, ErrInternalError
|
|
}
|
|
|
|
// Ensure the token is valid
|
|
if te == nil {
|
|
return nil, nil, nil, logical.ErrPermissionDenied
|
|
}
|
|
|
|
// CIDR checks bind all tokens except non-expiring root tokens
|
|
if te.TTL != 0 && len(te.BoundCIDRs) > 0 {
|
|
var valid bool
|
|
remoteSockAddr, err := sockaddr.NewSockAddr(req.Connection.RemoteAddr)
|
|
if err != nil {
|
|
if c.Logger().IsDebug() {
|
|
c.Logger().Debug("could not parse remote addr into sockaddr", "error", err, "remote_addr", req.Connection.RemoteAddr)
|
|
}
|
|
return nil, nil, nil, logical.ErrPermissionDenied
|
|
}
|
|
for _, cidr := range te.BoundCIDRs {
|
|
if cidr.Contains(remoteSockAddr) {
|
|
valid = true
|
|
break
|
|
}
|
|
}
|
|
if !valid {
|
|
return nil, nil, nil, logical.ErrPermissionDenied
|
|
}
|
|
}
|
|
|
|
tokenPolicies := te.Policies
|
|
|
|
entity, derivedPolicies, err := c.fetchEntityAndDerivedPolicies(te.EntityID)
|
|
if err != nil {
|
|
return nil, nil, nil, ErrInternalError
|
|
}
|
|
|
|
tokenPolicies = append(tokenPolicies, derivedPolicies...)
|
|
|
|
// Construct the corresponding ACL object
|
|
acl, err := c.policyStore.ACL(c.activeContext, tokenPolicies...)
|
|
if err != nil {
|
|
c.logger.Error("failed to construct ACL", "error", err)
|
|
return nil, nil, nil, ErrInternalError
|
|
}
|
|
|
|
return acl, te, entity, nil
|
|
}
|
|
|
|
func (c *Core) checkToken(ctx context.Context, req *logical.Request, unauth bool) (*logical.Auth, *TokenEntry, error) {
|
|
defer metrics.MeasureSince([]string{"core", "check_token"}, time.Now())
|
|
|
|
var acl *ACL
|
|
var te *TokenEntry
|
|
var entity *identity.Entity
|
|
var err error
|
|
|
|
// Even if unauth, if a token is provided, there's little reason not to
|
|
// gather as much info as possible for the audit log and to e.g. control
|
|
// trace mode for EGPs.
|
|
if !unauth || (unauth && req.ClientToken != "") {
|
|
acl, te, entity, err = c.fetchACLTokenEntryAndEntity(req)
|
|
// In the unauth case we don't want to fail the command, since it's
|
|
// unauth, we just have no information to attach to the request, so
|
|
// ignore errors...this was best-effort anyways
|
|
if err != nil && !unauth {
|
|
return nil, te, err
|
|
}
|
|
}
|
|
|
|
if entity != nil && entity.Disabled {
|
|
return nil, te, logical.ErrPermissionDenied
|
|
}
|
|
|
|
// Check if this is a root protected path
|
|
rootPath := c.router.RootPath(req.Path)
|
|
|
|
if rootPath && unauth {
|
|
return nil, nil, errors.New("cannot access root path in unauthenticated request")
|
|
}
|
|
|
|
// When we receive a write of either type, rather than require clients to
|
|
// PUT/POST and trust the operation, we ask the backend to give us the real
|
|
// skinny -- if the backend implements an existence check, it can tell us
|
|
// whether a particular resource exists. Then we can mark it as an update
|
|
// or creation as appropriate.
|
|
if req.Operation == logical.CreateOperation || req.Operation == logical.UpdateOperation {
|
|
checkExists, resourceExists, err := c.router.RouteExistenceCheck(ctx, req)
|
|
switch err {
|
|
case logical.ErrUnsupportedPath:
|
|
// fail later via bad path to avoid confusing items in the log
|
|
checkExists = false
|
|
case nil:
|
|
// Continue on
|
|
default:
|
|
c.logger.Error("failed to run existence check", "error", err)
|
|
if _, ok := err.(errutil.UserError); ok {
|
|
return nil, nil, err
|
|
} else {
|
|
return nil, nil, ErrInternalError
|
|
}
|
|
}
|
|
|
|
switch {
|
|
case checkExists == false:
|
|
// No existence check, so always treat it as an update operation, which is how it is pre 0.5
|
|
req.Operation = logical.UpdateOperation
|
|
case resourceExists == true:
|
|
// It exists, so force an update operation
|
|
req.Operation = logical.UpdateOperation
|
|
case resourceExists == false:
|
|
// It doesn't exist, force a create operation
|
|
req.Operation = logical.CreateOperation
|
|
default:
|
|
panic("unreachable code")
|
|
}
|
|
}
|
|
// Create the auth response
|
|
auth := &logical.Auth{
|
|
ClientToken: req.ClientToken,
|
|
Accessor: req.ClientTokenAccessor,
|
|
}
|
|
|
|
if te != nil {
|
|
auth.Policies = te.Policies
|
|
auth.Metadata = te.Meta
|
|
auth.DisplayName = te.DisplayName
|
|
auth.EntityID = te.EntityID
|
|
// Store the entity ID in the request object
|
|
req.EntityID = te.EntityID
|
|
}
|
|
|
|
// Check the standard non-root ACLs. Return the token entry if it's not
|
|
// allowed so we can decrement the use count.
|
|
authResults := c.performPolicyChecks(ctx, acl, te, req, entity, &PolicyCheckOpts{
|
|
Unauth: unauth,
|
|
RootPrivsRequired: rootPath,
|
|
})
|
|
if authResults.Error.ErrorOrNil() != nil {
|
|
return auth, te, authResults.Error
|
|
}
|
|
if !authResults.Allowed {
|
|
// Return auth for audit logging even if not allowed
|
|
return auth, te, logical.ErrPermissionDenied
|
|
}
|
|
|
|
return auth, te, nil
|
|
}
|
|
|
|
// Sealed checks if the Vault is current sealed
|
|
func (c *Core) Sealed() (bool, error) {
|
|
c.stateLock.RLock()
|
|
defer c.stateLock.RUnlock()
|
|
return c.sealed, nil
|
|
}
|
|
|
|
// Standby checks if the Vault is in standby mode
|
|
func (c *Core) Standby() (bool, error) {
|
|
c.stateLock.RLock()
|
|
defer c.stateLock.RUnlock()
|
|
return c.standby, nil
|
|
}
|
|
|
|
// Leader is used to get the current active leader
|
|
func (c *Core) Leader() (isLeader bool, leaderAddr, clusterAddr string, err error) {
|
|
c.stateLock.RLock()
|
|
defer c.stateLock.RUnlock()
|
|
|
|
// Check if sealed
|
|
if c.sealed {
|
|
return false, "", "", consts.ErrSealed
|
|
}
|
|
|
|
// Check if HA enabled
|
|
if c.ha == nil {
|
|
return false, "", "", ErrHANotEnabled
|
|
}
|
|
|
|
// Check if we are the leader
|
|
if !c.standby {
|
|
return true, c.redirectAddr, c.clusterAddr, nil
|
|
}
|
|
|
|
// Initialize a lock
|
|
lock, err := c.ha.LockWith(coreLockPath, "read")
|
|
if err != nil {
|
|
return false, "", "", err
|
|
}
|
|
|
|
// Read the value
|
|
held, leaderUUID, err := lock.Value()
|
|
if err != nil {
|
|
return false, "", "", err
|
|
}
|
|
if !held {
|
|
return false, "", "", nil
|
|
}
|
|
|
|
c.clusterLeaderParamsLock.RLock()
|
|
localLeaderUUID := c.clusterLeaderUUID
|
|
localRedirAddr := c.clusterLeaderRedirectAddr
|
|
localClusterAddr := c.clusterLeaderClusterAddr
|
|
c.clusterLeaderParamsLock.RUnlock()
|
|
|
|
// If the leader hasn't changed, return the cached value; nothing changes
|
|
// mid-leadership, and the barrier caches anyways
|
|
if leaderUUID == localLeaderUUID && localRedirAddr != "" {
|
|
return false, localRedirAddr, localClusterAddr, nil
|
|
}
|
|
|
|
c.logger.Trace("found new active node information, refreshing")
|
|
|
|
c.clusterLeaderParamsLock.Lock()
|
|
defer c.clusterLeaderParamsLock.Unlock()
|
|
|
|
// Validate base conditions again
|
|
if leaderUUID == c.clusterLeaderUUID && c.clusterLeaderRedirectAddr != "" {
|
|
return false, localRedirAddr, localClusterAddr, nil
|
|
}
|
|
|
|
key := coreLeaderPrefix + leaderUUID
|
|
// Use background because postUnseal isn't run on standby
|
|
entry, err := c.barrier.Get(context.Background(), key)
|
|
if err != nil {
|
|
return false, "", "", err
|
|
}
|
|
if entry == nil {
|
|
return false, "", "", nil
|
|
}
|
|
|
|
var oldAdv bool
|
|
|
|
var adv activeAdvertisement
|
|
err = jsonutil.DecodeJSON(entry.Value, &adv)
|
|
if err != nil {
|
|
// Fall back to pre-struct handling
|
|
adv.RedirectAddr = string(entry.Value)
|
|
c.logger.Debug("parsed redirect addr for new active node", "redirect_addr", adv.RedirectAddr)
|
|
oldAdv = true
|
|
}
|
|
|
|
if !oldAdv {
|
|
c.logger.Debug("parsing information for new active node", "active_cluster_addr", adv.ClusterAddr, "active_redirect_addr", adv.RedirectAddr)
|
|
|
|
// Ensure we are using current values
|
|
err = c.loadLocalClusterTLS(adv)
|
|
if err != nil {
|
|
return false, "", "", err
|
|
}
|
|
|
|
// This will ensure that we both have a connection at the ready and that
|
|
// the address is the current known value
|
|
// Since this is standby, we don't use the active context. Later we may
|
|
// use a process-scoped context
|
|
err = c.refreshRequestForwardingConnection(context.Background(), adv.ClusterAddr)
|
|
if err != nil {
|
|
return false, "", "", err
|
|
}
|
|
}
|
|
|
|
// Don't set these until everything has been parsed successfully or we'll
|
|
// never try again
|
|
c.clusterLeaderRedirectAddr = adv.RedirectAddr
|
|
c.clusterLeaderClusterAddr = adv.ClusterAddr
|
|
c.clusterLeaderUUID = leaderUUID
|
|
|
|
return false, adv.RedirectAddr, adv.ClusterAddr, nil
|
|
}
|
|
|
|
// SecretProgress returns the number of keys provided so far
|
|
func (c *Core) SecretProgress() (int, string) {
|
|
c.stateLock.RLock()
|
|
defer c.stateLock.RUnlock()
|
|
switch c.unlockInfo {
|
|
case nil:
|
|
return 0, ""
|
|
default:
|
|
return len(c.unlockInfo.Parts), c.unlockInfo.Nonce
|
|
}
|
|
}
|
|
|
|
// ResetUnsealProcess removes the current unlock parts from memory, to reset
|
|
// the unsealing process
|
|
func (c *Core) ResetUnsealProcess() {
|
|
c.stateLock.Lock()
|
|
defer c.stateLock.Unlock()
|
|
if !c.sealed {
|
|
return
|
|
}
|
|
c.unlockInfo = nil
|
|
}
|
|
|
|
// Unseal is used to provide one of the key parts to unseal the Vault.
|
|
//
|
|
// They key given as a parameter will automatically be zerod after
|
|
// this method is done with it. If you want to keep the key around, a copy
|
|
// should be made.
|
|
func (c *Core) Unseal(key []byte) (bool, error) {
|
|
defer metrics.MeasureSince([]string{"core", "unseal"}, time.Now())
|
|
|
|
c.stateLock.Lock()
|
|
defer c.stateLock.Unlock()
|
|
|
|
ctx := context.Background()
|
|
|
|
// Explicitly check for init status. This also checks if the seal
|
|
// configuration is valid (i.e. non-nil).
|
|
init, err := c.Initialized(ctx)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if !init {
|
|
return false, ErrNotInit
|
|
}
|
|
|
|
// Verify the key length
|
|
min, max := c.barrier.KeyLength()
|
|
max += shamir.ShareOverhead
|
|
if len(key) < min {
|
|
return false, &ErrInvalidKey{fmt.Sprintf("key is shorter than minimum %d bytes", min)}
|
|
}
|
|
if len(key) > max {
|
|
return false, &ErrInvalidKey{fmt.Sprintf("key is longer than maximum %d bytes", max)}
|
|
}
|
|
|
|
// Get the barrier seal configuration
|
|
config, err := c.seal.BarrierConfig(ctx)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// Check if already unsealed
|
|
if !c.sealed {
|
|
return true, nil
|
|
}
|
|
|
|
masterKey, err := c.unsealPart(ctx, config, key, false)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if masterKey != nil {
|
|
return c.unsealInternal(ctx, masterKey)
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
// UnsealWithRecoveryKeys is used to provide one of the recovery key shares to
|
|
// unseal the Vault.
|
|
func (c *Core) UnsealWithRecoveryKeys(ctx context.Context, key []byte) (bool, error) {
|
|
defer metrics.MeasureSince([]string{"core", "unseal_with_recovery_keys"}, time.Now())
|
|
|
|
c.stateLock.Lock()
|
|
defer c.stateLock.Unlock()
|
|
|
|
// Explicitly check for init status
|
|
init, err := c.Initialized(ctx)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if !init {
|
|
return false, ErrNotInit
|
|
}
|
|
|
|
var config *SealConfig
|
|
// If recovery keys are supported then use recovery seal config to unseal
|
|
if c.seal.RecoveryKeySupported() {
|
|
config, err = c.seal.RecoveryConfig(ctx)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
}
|
|
|
|
// Check if already unsealed
|
|
if !c.sealed {
|
|
return true, nil
|
|
}
|
|
|
|
masterKey, err := c.unsealPart(ctx, config, key, true)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if masterKey != nil {
|
|
return c.unsealInternal(ctx, masterKey)
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
// unsealPart takes in a key share, and returns the master key if the threshold
|
|
// is met. If recovery keys are supported, recovery key shares may be provided.
|
|
func (c *Core) unsealPart(ctx context.Context, config *SealConfig, key []byte, useRecoveryKeys bool) ([]byte, error) {
|
|
// Check if we already have this piece
|
|
if c.unlockInfo != nil {
|
|
for _, existing := range c.unlockInfo.Parts {
|
|
if subtle.ConstantTimeCompare(existing, key) == 1 {
|
|
return nil, nil
|
|
}
|
|
}
|
|
} else {
|
|
uuid, err := uuid.GenerateUUID()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
c.unlockInfo = &unlockInformation{
|
|
Nonce: uuid,
|
|
}
|
|
}
|
|
|
|
// Store this key
|
|
c.unlockInfo.Parts = append(c.unlockInfo.Parts, key)
|
|
|
|
// Check if we don't have enough keys to unlock, proceed through the rest of
|
|
// the call only if we have met the threshold
|
|
if len(c.unlockInfo.Parts) < config.SecretThreshold {
|
|
if c.logger.IsDebug() {
|
|
c.logger.Debug("cannot unseal, not enough keys", "keys", len(c.unlockInfo.Parts), "threshold", config.SecretThreshold, "nonce", c.unlockInfo.Nonce)
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
// Best-effort memzero of unlock parts once we're done with them
|
|
defer func() {
|
|
for i := range c.unlockInfo.Parts {
|
|
memzero(c.unlockInfo.Parts[i])
|
|
}
|
|
c.unlockInfo = nil
|
|
}()
|
|
|
|
// Recover the split key. recoveredKey is the shamir combined
|
|
// key, or the single provided key if the threshold is 1.
|
|
var recoveredKey []byte
|
|
var err error
|
|
if config.SecretThreshold == 1 {
|
|
recoveredKey = make([]byte, len(c.unlockInfo.Parts[0]))
|
|
copy(recoveredKey, c.unlockInfo.Parts[0])
|
|
} else {
|
|
recoveredKey, err = shamir.Combine(c.unlockInfo.Parts)
|
|
if err != nil {
|
|
return nil, errwrap.Wrapf("failed to compute master key: {{err}}", err)
|
|
}
|
|
}
|
|
|
|
if c.seal.RecoveryKeySupported() && useRecoveryKeys {
|
|
// Verify recovery key
|
|
if err := c.seal.VerifyRecoveryKey(ctx, recoveredKey); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Get stored keys and shamir combine into single master key. Unsealing with
|
|
// recovery keys currently does not support: 1) mixed stored and non-stored
|
|
// keys setup, nor 2) seals that support recovery keys but not stored keys.
|
|
// If insufficient shares are provided, shamir.Combine will error, and if
|
|
// no stored keys are found it will return masterKey as nil.
|
|
var masterKey []byte
|
|
if c.seal.StoredKeysSupported() {
|
|
masterKeyShares, err := c.seal.GetStoredKeys(ctx)
|
|
if err != nil {
|
|
return nil, errwrap.Wrapf("unable to retrieve stored keys: {{err}}", err)
|
|
}
|
|
|
|
if len(masterKeyShares) == 1 {
|
|
return masterKeyShares[0], nil
|
|
}
|
|
|
|
masterKey, err = shamir.Combine(masterKeyShares)
|
|
if err != nil {
|
|
return nil, errwrap.Wrapf("failed to compute master key: {{err}}", err)
|
|
}
|
|
}
|
|
return masterKey, nil
|
|
}
|
|
|
|
// If this is not a recovery key-supported seal, then the recovered key is
|
|
// the master key to be returned.
|
|
return recoveredKey, nil
|
|
}
|
|
|
|
// unsealInternal takes in the master key and attempts to unseal the barrier.
|
|
// N.B.: This must be called with the state write lock held.
|
|
func (c *Core) unsealInternal(ctx context.Context, masterKey []byte) (bool, error) {
|
|
defer memzero(masterKey)
|
|
|
|
// Attempt to unlock
|
|
if err := c.barrier.Unseal(ctx, masterKey); err != nil {
|
|
return false, err
|
|
}
|
|
if c.logger.IsInfo() {
|
|
c.logger.Info("vault is unsealed")
|
|
}
|
|
|
|
// Do post-unseal setup if HA is not enabled
|
|
if c.ha == nil {
|
|
// We still need to set up cluster info even if it's not part of a
|
|
// cluster right now. This also populates the cached cluster object.
|
|
if err := c.setupCluster(ctx); err != nil {
|
|
c.logger.Error("cluster setup failed", "error", err)
|
|
c.barrier.Seal()
|
|
c.logger.Warn("vault is sealed")
|
|
return false, err
|
|
}
|
|
|
|
if err := c.postUnseal(); err != nil {
|
|
c.logger.Error("post-unseal setup failed", "error", err)
|
|
c.barrier.Seal()
|
|
c.logger.Warn("vault is sealed")
|
|
return false, err
|
|
}
|
|
|
|
c.standby = false
|
|
} else {
|
|
// Go to standby mode, wait until we are active to unseal
|
|
c.standbyDoneCh = make(chan struct{})
|
|
c.manualStepDownCh = make(chan struct{})
|
|
c.standbyStopCh = make(chan struct{})
|
|
go c.runStandby(c.standbyDoneCh, c.manualStepDownCh, c.standbyStopCh)
|
|
}
|
|
|
|
// Success!
|
|
c.sealed = false
|
|
|
|
// Force a cache bust here, which will also run migration code
|
|
if c.seal.RecoveryKeySupported() {
|
|
c.seal.SetRecoveryConfig(ctx, nil)
|
|
}
|
|
|
|
if c.ha != nil {
|
|
sd, ok := c.ha.(physical.ServiceDiscovery)
|
|
if ok {
|
|
if err := sd.NotifySealedStateChange(); err != nil {
|
|
if c.logger.IsWarn() {
|
|
c.logger.Warn("failed to notify unsealed status", "error", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// SealWithRequest takes in a logical.Request, acquires the lock, and passes
|
|
// through to sealInternal
|
|
func (c *Core) SealWithRequest(req *logical.Request) error {
|
|
defer metrics.MeasureSince([]string{"core", "seal-with-request"}, time.Now())
|
|
|
|
c.stateLock.RLock()
|
|
|
|
if c.sealed {
|
|
c.stateLock.RUnlock()
|
|
return nil
|
|
}
|
|
|
|
// This will unlock the read lock
|
|
// We use background context since we may not be active
|
|
return c.sealInitCommon(context.Background(), req)
|
|
}
|
|
|
|
// Seal takes in a token and creates a logical.Request, acquires the lock, and
|
|
// passes through to sealInternal
|
|
func (c *Core) Seal(token string) error {
|
|
defer metrics.MeasureSince([]string{"core", "seal"}, time.Now())
|
|
|
|
c.stateLock.RLock()
|
|
|
|
if c.sealed {
|
|
c.stateLock.RUnlock()
|
|
return nil
|
|
}
|
|
|
|
req := &logical.Request{
|
|
Operation: logical.UpdateOperation,
|
|
Path: "sys/seal",
|
|
ClientToken: token,
|
|
}
|
|
|
|
// This will unlock the read lock
|
|
// We use background context since we may not be active
|
|
return c.sealInitCommon(context.Background(), req)
|
|
}
|
|
|
|
// sealInitCommon is common logic for Seal and SealWithRequest and is used to
|
|
// re-seal the Vault. This requires the Vault to be unsealed again to perform
|
|
// any further operations. Note: this function will read-unlock the state lock.
|
|
func (c *Core) sealInitCommon(ctx context.Context, req *logical.Request) (retErr error) {
|
|
defer metrics.MeasureSince([]string{"core", "seal-internal"}, time.Now())
|
|
|
|
if req == nil {
|
|
retErr = multierror.Append(retErr, errors.New("nil request to seal"))
|
|
c.stateLock.RUnlock()
|
|
return retErr
|
|
}
|
|
|
|
// Since there is no token store in standby nodes, sealing cannot be done.
|
|
// Ideally, the request has to be forwarded to leader node for validation
|
|
// and the operation should be performed. But for now, just returning with
|
|
// an error and recommending a vault restart, which essentially does the
|
|
// same thing.
|
|
if c.standby {
|
|
c.logger.Error("vault cannot seal when in standby mode; please restart instead")
|
|
retErr = multierror.Append(retErr, errors.New("vault cannot seal when in standby mode; please restart instead"))
|
|
c.stateLock.RUnlock()
|
|
return retErr
|
|
}
|
|
|
|
// Validate the token is a root token
|
|
acl, te, entity, err := c.fetchACLTokenEntryAndEntity(req)
|
|
if err != nil {
|
|
retErr = multierror.Append(retErr, err)
|
|
c.stateLock.RUnlock()
|
|
return retErr
|
|
}
|
|
|
|
// Audit-log the request before going any further
|
|
auth := &logical.Auth{
|
|
ClientToken: req.ClientToken,
|
|
}
|
|
if te != nil {
|
|
auth.Policies = te.Policies
|
|
auth.Metadata = te.Meta
|
|
auth.DisplayName = te.DisplayName
|
|
auth.EntityID = te.EntityID
|
|
}
|
|
|
|
logInput := &audit.LogInput{
|
|
Auth: auth,
|
|
Request: req,
|
|
}
|
|
if err := c.auditBroker.LogRequest(ctx, logInput, c.auditedHeaders); err != nil {
|
|
c.logger.Error("failed to audit request", "request_path", req.Path, "error", err)
|
|
retErr = multierror.Append(retErr, errors.New("failed to audit request, cannot continue"))
|
|
c.stateLock.RUnlock()
|
|
return retErr
|
|
}
|
|
|
|
if entity != nil && entity.Disabled {
|
|
retErr = multierror.Append(retErr, logical.ErrPermissionDenied)
|
|
c.stateLock.RUnlock()
|
|
return retErr
|
|
}
|
|
|
|
// Attempt to use the token (decrement num_uses)
|
|
// On error bail out; if the token has been revoked, bail out too
|
|
if te != nil {
|
|
te, err = c.tokenStore.UseToken(ctx, te)
|
|
if err != nil {
|
|
c.logger.Error("failed to use token", "error", err)
|
|
retErr = multierror.Append(retErr, ErrInternalError)
|
|
c.stateLock.RUnlock()
|
|
return retErr
|
|
}
|
|
if te == nil {
|
|
// Token is no longer valid
|
|
retErr = multierror.Append(retErr, logical.ErrPermissionDenied)
|
|
c.stateLock.RUnlock()
|
|
return retErr
|
|
}
|
|
}
|
|
|
|
// Verify that this operation is allowed
|
|
authResults := c.performPolicyChecks(ctx, acl, te, req, entity, &PolicyCheckOpts{
|
|
RootPrivsRequired: true,
|
|
})
|
|
if authResults.Error.ErrorOrNil() != nil {
|
|
retErr = multierror.Append(retErr, authResults.Error)
|
|
c.stateLock.RUnlock()
|
|
return retErr
|
|
}
|
|
if !authResults.Allowed {
|
|
retErr = multierror.Append(retErr, logical.ErrPermissionDenied)
|
|
c.stateLock.RUnlock()
|
|
return retErr
|
|
}
|
|
|
|
if te != nil && te.NumUses == tokenRevocationPending {
|
|
// Token needs to be revoked. We do this immediately here because
|
|
// we won't have a token store after sealing.
|
|
leaseID, err := c.expiration.CreateOrFetchRevocationLeaseByToken(te)
|
|
if err == nil {
|
|
err = c.expiration.Revoke(leaseID)
|
|
}
|
|
if err != nil {
|
|
c.logger.Error("token needed revocation before seal but failed to revoke", "error", err)
|
|
retErr = multierror.Append(retErr, ErrInternalError)
|
|
}
|
|
}
|
|
|
|
// Tell any requests that know about this to stop
|
|
if c.activeContextCancelFunc != nil {
|
|
c.activeContextCancelFunc()
|
|
}
|
|
|
|
// Unlock from the request handling
|
|
c.stateLock.RUnlock()
|
|
|
|
//Seal the Vault
|
|
c.stateLock.Lock()
|
|
defer c.stateLock.Unlock()
|
|
sealErr := c.sealInternal(false)
|
|
|
|
if sealErr != nil {
|
|
retErr = multierror.Append(retErr, sealErr)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
// StepDown is used to step down from leadership
|
|
func (c *Core) StepDown(req *logical.Request) (retErr error) {
|
|
defer metrics.MeasureSince([]string{"core", "step_down"}, time.Now())
|
|
|
|
if req == nil {
|
|
retErr = multierror.Append(retErr, errors.New("nil request to step-down"))
|
|
return retErr
|
|
}
|
|
|
|
c.stateLock.RLock()
|
|
defer c.stateLock.RUnlock()
|
|
if c.sealed {
|
|
return nil
|
|
}
|
|
if c.ha == nil || c.standby {
|
|
return nil
|
|
}
|
|
|
|
ctx := c.activeContext
|
|
|
|
acl, te, entity, err := c.fetchACLTokenEntryAndEntity(req)
|
|
if err != nil {
|
|
retErr = multierror.Append(retErr, err)
|
|
return retErr
|
|
}
|
|
|
|
// Audit-log the request before going any further
|
|
auth := &logical.Auth{
|
|
ClientToken: req.ClientToken,
|
|
}
|
|
if te != nil {
|
|
auth.Policies = te.Policies
|
|
auth.Metadata = te.Meta
|
|
auth.DisplayName = te.DisplayName
|
|
auth.EntityID = te.EntityID
|
|
}
|
|
|
|
logInput := &audit.LogInput{
|
|
Auth: auth,
|
|
Request: req,
|
|
}
|
|
if err := c.auditBroker.LogRequest(ctx, logInput, c.auditedHeaders); err != nil {
|
|
c.logger.Error("failed to audit request", "request_path", req.Path, "error", err)
|
|
retErr = multierror.Append(retErr, errors.New("failed to audit request, cannot continue"))
|
|
return retErr
|
|
}
|
|
|
|
if entity != nil && entity.Disabled {
|
|
retErr = multierror.Append(retErr, logical.ErrPermissionDenied)
|
|
c.stateLock.RUnlock()
|
|
return retErr
|
|
}
|
|
|
|
// Attempt to use the token (decrement num_uses)
|
|
if te != nil {
|
|
te, err = c.tokenStore.UseToken(ctx, te)
|
|
if err != nil {
|
|
c.logger.Error("failed to use token", "error", err)
|
|
retErr = multierror.Append(retErr, ErrInternalError)
|
|
return retErr
|
|
}
|
|
if te == nil {
|
|
// Token has been revoked
|
|
retErr = multierror.Append(retErr, logical.ErrPermissionDenied)
|
|
return retErr
|
|
}
|
|
}
|
|
|
|
// Verify that this operation is allowed
|
|
authResults := c.performPolicyChecks(ctx, acl, te, req, entity, &PolicyCheckOpts{
|
|
RootPrivsRequired: true,
|
|
})
|
|
if authResults.Error.ErrorOrNil() != nil {
|
|
retErr = multierror.Append(retErr, authResults.Error)
|
|
return retErr
|
|
}
|
|
if !authResults.Allowed {
|
|
retErr = multierror.Append(retErr, logical.ErrPermissionDenied)
|
|
return retErr
|
|
}
|
|
|
|
if te != nil && te.NumUses == tokenRevocationPending {
|
|
// Token needs to be revoked. We do this immediately here because
|
|
// we won't have a token store after sealing.
|
|
leaseID, err := c.expiration.CreateOrFetchRevocationLeaseByToken(te)
|
|
if err == nil {
|
|
err = c.expiration.Revoke(leaseID)
|
|
}
|
|
if err != nil {
|
|
c.logger.Error("token needed revocation before step-down but failed to revoke", "error", err)
|
|
retErr = multierror.Append(retErr, ErrInternalError)
|
|
}
|
|
}
|
|
|
|
select {
|
|
case c.manualStepDownCh <- struct{}{}:
|
|
default:
|
|
c.logger.Warn("manual step-down operation already queued")
|
|
}
|
|
|
|
return retErr
|
|
}
|
|
|
|
// UIEnabled returns if the UI is enabled
|
|
func (c *Core) UIEnabled() bool {
|
|
return c.uiConfig.Enabled()
|
|
}
|
|
|
|
// UIHeaders returns configured UI headers
|
|
func (c *Core) UIHeaders() (http.Header, error) {
|
|
return c.uiConfig.Headers(context.Background())
|
|
}
|
|
|
|
// sealInternal is an internal method used to seal the vault. It does not do
|
|
// any authorization checking. The stateLock must be held prior to calling.
|
|
func (c *Core) sealInternal(keepLock bool) error {
|
|
if c.sealed {
|
|
return nil
|
|
}
|
|
|
|
// Enable that we are sealed to prevent further transactions
|
|
c.sealed = true
|
|
|
|
c.logger.Debug("marked as sealed")
|
|
|
|
// Clear forwarding clients
|
|
c.requestForwardingConnectionLock.Lock()
|
|
c.clearForwardingClients()
|
|
c.requestForwardingConnectionLock.Unlock()
|
|
|
|
// Do pre-seal teardown if HA is not enabled
|
|
if c.ha == nil {
|
|
// Even in a non-HA context we key off of this for some things
|
|
c.standby = true
|
|
if err := c.preSeal(); err != nil {
|
|
c.logger.Error("pre-seal teardown failed", "error", err)
|
|
return fmt.Errorf("internal error")
|
|
}
|
|
} else {
|
|
if keepLock {
|
|
atomic.StoreUint32(&c.keepHALockOnStepDown, 1)
|
|
}
|
|
// If we are trying to acquire the lock, force it to return with nil so
|
|
// runStandby will exit
|
|
// If we are active, signal the standby goroutine to shut down and wait
|
|
// for completion. We have the state lock here so nothing else should
|
|
// be toggling standby status.
|
|
close(c.standbyStopCh)
|
|
c.logger.Debug("finished triggering standbyStopCh for runStandby")
|
|
|
|
// Wait for runStandby to stop
|
|
<-c.standbyDoneCh
|
|
atomic.StoreUint32(&c.keepHALockOnStepDown, 0)
|
|
c.logger.Debug("runStandby done")
|
|
}
|
|
|
|
c.logger.Debug("sealing barrier")
|
|
if err := c.barrier.Seal(); err != nil {
|
|
c.logger.Error("error sealing barrier", "error", err)
|
|
return err
|
|
}
|
|
|
|
if c.ha != nil {
|
|
sd, ok := c.ha.(physical.ServiceDiscovery)
|
|
if ok {
|
|
if err := sd.NotifySealedStateChange(); err != nil {
|
|
if c.logger.IsWarn() {
|
|
c.logger.Warn("failed to notify sealed status", "error", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
c.logger.Info("vault is sealed")
|
|
|
|
return nil
|
|
}
|
|
|
|
// postUnseal is invoked after the barrier is unsealed, but before
|
|
// allowing any user operations. This allows us to setup any state that
|
|
// requires the Vault to be unsealed such as mount tables, logical backends,
|
|
// credential stores, etc.
|
|
func (c *Core) postUnseal() (retErr error) {
|
|
defer metrics.MeasureSince([]string{"core", "post_unseal"}, time.Now())
|
|
|
|
// Clear any out
|
|
c.postUnsealFuncs = nil
|
|
|
|
// Create a new request context
|
|
c.activeContext, c.activeContextCancelFunc = context.WithCancel(context.Background())
|
|
|
|
defer func() {
|
|
if retErr != nil {
|
|
c.activeContextCancelFunc()
|
|
c.preSeal()
|
|
}
|
|
}()
|
|
c.logger.Info("post-unseal setup starting")
|
|
|
|
// Clear forwarding clients; we're active
|
|
c.requestForwardingConnectionLock.Lock()
|
|
c.clearForwardingClients()
|
|
c.requestForwardingConnectionLock.Unlock()
|
|
|
|
// Enable the cache
|
|
c.physicalCache.Purge(c.activeContext)
|
|
if !c.cachingDisabled {
|
|
c.physicalCache.SetEnabled(true)
|
|
}
|
|
|
|
switch c.sealUnwrapper.(type) {
|
|
case *sealUnwrapper:
|
|
c.sealUnwrapper.(*sealUnwrapper).runUnwraps()
|
|
case *transactionalSealUnwrapper:
|
|
c.sealUnwrapper.(*transactionalSealUnwrapper).runUnwraps()
|
|
}
|
|
|
|
// Purge these for safety in case of a rekey
|
|
c.seal.SetBarrierConfig(c.activeContext, nil)
|
|
if c.seal.RecoveryKeySupported() {
|
|
c.seal.SetRecoveryConfig(c.activeContext, nil)
|
|
}
|
|
|
|
if err := enterprisePostUnseal(c); err != nil {
|
|
return err
|
|
}
|
|
if err := c.ensureWrappingKey(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
if err := c.setupPluginCatalog(); err != nil {
|
|
return err
|
|
}
|
|
if err := c.loadMounts(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
if err := c.setupMounts(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
if err := c.setupPolicyStore(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
if err := c.loadCORSConfig(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
if err := c.loadCredentials(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
if err := c.setupCredentials(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
if err := c.startRollback(); err != nil {
|
|
return err
|
|
}
|
|
if err := c.setupExpiration(); err != nil {
|
|
return err
|
|
}
|
|
if err := c.loadAudits(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
if err := c.setupAudits(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
if err := c.loadIdentityStoreArtifacts(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
if err := c.setupAuditedHeadersConfig(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
|
|
if c.ha != nil {
|
|
if err := c.startClusterListener(c.activeContext); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
c.metricsCh = make(chan struct{})
|
|
go c.emitMetrics(c.metricsCh)
|
|
|
|
// This is intentionally the last block in this function. We want to allow
|
|
// writes just before allowing client requests, to ensure everything has
|
|
// been set up properly before any writes can have happened.
|
|
for _, v := range c.postUnsealFuncs {
|
|
v()
|
|
}
|
|
|
|
c.logger.Info("post-unseal setup complete")
|
|
return nil
|
|
}
|
|
|
|
// preSeal is invoked before the barrier is sealed, allowing
|
|
// for any state teardown required.
|
|
func (c *Core) preSeal() error {
|
|
defer metrics.MeasureSince([]string{"core", "pre_seal"}, time.Now())
|
|
c.logger.Info("pre-seal teardown starting")
|
|
|
|
// Clear any pending funcs
|
|
c.postUnsealFuncs = nil
|
|
|
|
// Clear any rekey progress
|
|
c.barrierRekeyConfig = nil
|
|
c.recoveryRekeyConfig = nil
|
|
|
|
if c.metricsCh != nil {
|
|
close(c.metricsCh)
|
|
c.metricsCh = nil
|
|
}
|
|
var result error
|
|
|
|
c.stopClusterListener()
|
|
|
|
if err := c.teardownAudits(); err != nil {
|
|
result = multierror.Append(result, errwrap.Wrapf("error tearing down audits: {{err}}", err))
|
|
}
|
|
if err := c.stopExpiration(); err != nil {
|
|
result = multierror.Append(result, errwrap.Wrapf("error stopping expiration: {{err}}", err))
|
|
}
|
|
if err := c.teardownCredentials(c.activeContext); err != nil {
|
|
result = multierror.Append(result, errwrap.Wrapf("error tearing down credentials: {{err}}", err))
|
|
}
|
|
if err := c.teardownPolicyStore(); err != nil {
|
|
result = multierror.Append(result, errwrap.Wrapf("error tearing down policy store: {{err}}", err))
|
|
}
|
|
if err := c.stopRollback(); err != nil {
|
|
result = multierror.Append(result, errwrap.Wrapf("error stopping rollback: {{err}}", err))
|
|
}
|
|
if err := c.unloadMounts(c.activeContext); err != nil {
|
|
result = multierror.Append(result, errwrap.Wrapf("error unloading mounts: {{err}}", err))
|
|
}
|
|
if err := enterprisePreSeal(c); err != nil {
|
|
result = multierror.Append(result, err)
|
|
}
|
|
|
|
switch c.sealUnwrapper.(type) {
|
|
case *sealUnwrapper:
|
|
c.sealUnwrapper.(*sealUnwrapper).stopUnwraps()
|
|
case *transactionalSealUnwrapper:
|
|
c.sealUnwrapper.(*transactionalSealUnwrapper).stopUnwraps()
|
|
}
|
|
|
|
// Purge the cache
|
|
c.physicalCache.SetEnabled(false)
|
|
c.physicalCache.Purge(c.activeContext)
|
|
|
|
c.logger.Info("pre-seal teardown complete")
|
|
return result
|
|
}
|
|
|
|
func enterprisePostUnsealImpl(c *Core) error {
|
|
return nil
|
|
}
|
|
|
|
func enterprisePreSealImpl(c *Core) error {
|
|
return nil
|
|
}
|
|
|
|
func startReplicationImpl(c *Core) error {
|
|
return nil
|
|
}
|
|
|
|
func stopReplicationImpl(c *Core) error {
|
|
return nil
|
|
}
|
|
|
|
// runStandby is a long running routine that is used when an HA backend
|
|
// is enabled. It waits until we are leader and switches this Vault to
|
|
// active.
|
|
func (c *Core) runStandby(doneCh, manualStepDownCh, stopCh chan struct{}) {
|
|
defer close(doneCh)
|
|
defer close(manualStepDownCh)
|
|
c.logger.Info("entering standby mode")
|
|
|
|
// Monitor for key rotation
|
|
keyRotateDone := make(chan struct{})
|
|
keyRotateStop := make(chan struct{})
|
|
go c.periodicCheckKeyUpgrade(context.Background(), keyRotateDone, keyRotateStop)
|
|
// Monitor for new leadership
|
|
checkLeaderDone := make(chan struct{})
|
|
checkLeaderStop := make(chan struct{})
|
|
go c.periodicLeaderRefresh(checkLeaderDone, checkLeaderStop)
|
|
defer func() {
|
|
c.logger.Debug("closed periodic key rotation checker stop channel")
|
|
close(keyRotateStop)
|
|
<-keyRotateDone
|
|
close(checkLeaderStop)
|
|
c.logger.Debug("closed periodic leader refresh stop channel")
|
|
<-checkLeaderDone
|
|
c.logger.Debug("periodic leader refresh returned")
|
|
}()
|
|
|
|
var manualStepDown bool
|
|
for {
|
|
// Check for a shutdown
|
|
select {
|
|
case <-stopCh:
|
|
c.logger.Debug("stop channel triggered in runStandby")
|
|
return
|
|
default:
|
|
// If we've just down, we could instantly grab the lock again. Give
|
|
// the other nodes a chance.
|
|
if manualStepDown {
|
|
time.Sleep(manualStepDownSleepPeriod)
|
|
manualStepDown = false
|
|
}
|
|
}
|
|
|
|
// Create a lock
|
|
uuid, err := uuid.GenerateUUID()
|
|
if err != nil {
|
|
c.logger.Error("failed to generate uuid", "error", err)
|
|
return
|
|
}
|
|
lock, err := c.ha.LockWith(coreLockPath, uuid)
|
|
if err != nil {
|
|
c.logger.Error("failed to create lock", "error", err)
|
|
return
|
|
}
|
|
|
|
// Attempt the acquisition
|
|
leaderLostCh := c.acquireLock(lock, stopCh)
|
|
|
|
// Bail if we are being shutdown
|
|
if leaderLostCh == nil {
|
|
return
|
|
}
|
|
c.logger.Info("acquired lock, enabling active operation")
|
|
|
|
// This is used later to log a metrics event; this can be helpful to
|
|
// detect flapping
|
|
activeTime := time.Now()
|
|
|
|
// Grab the lock as we need it for cluster setup, which needs to happen
|
|
// before advertising;
|
|
|
|
lockGrabbedCh := make(chan struct{})
|
|
go func() {
|
|
// Grab the lock
|
|
c.stateLock.Lock()
|
|
// If stopCh has been closed, which only happens while the
|
|
// stateLock is held, we have actually terminated, so we just
|
|
// instantly give up the lock, otherwise we notify that it's ready
|
|
// for consumption
|
|
select {
|
|
case <-stopCh:
|
|
c.stateLock.Unlock()
|
|
default:
|
|
close(lockGrabbedCh)
|
|
}
|
|
}()
|
|
|
|
select {
|
|
case <-stopCh:
|
|
lock.Unlock()
|
|
metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime)
|
|
return
|
|
case <-lockGrabbedCh:
|
|
// We now have the lock and can use it
|
|
}
|
|
|
|
if c.sealed {
|
|
c.logger.Warn("grabbed HA lock but already sealed, exiting")
|
|
lock.Unlock()
|
|
c.stateLock.Unlock()
|
|
metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime)
|
|
return
|
|
}
|
|
|
|
// Store the lock so that we can manually clear it later if needed
|
|
c.heldHALock = lock
|
|
|
|
// We haven't run postUnseal yet so we have nothing meaningful to use here
|
|
ctx := context.Background()
|
|
|
|
// This block is used to wipe barrier/seal state and verify that
|
|
// everything is sane. If we have no sanity in the barrier, we actually
|
|
// seal, as there's little we can do.
|
|
{
|
|
c.seal.SetBarrierConfig(ctx, nil)
|
|
if c.seal.RecoveryKeySupported() {
|
|
c.seal.SetRecoveryConfig(ctx, nil)
|
|
}
|
|
|
|
if err := c.performKeyUpgrades(ctx); err != nil {
|
|
// We call this in a goroutine so that we can give up the
|
|
// statelock and have this shut us down; sealInternal has a
|
|
// workflow where it watches for the stopCh to close so we want
|
|
// to return from here
|
|
c.logger.Error("error performing key upgrades", "error", err)
|
|
go c.Shutdown()
|
|
c.heldHALock = nil
|
|
lock.Unlock()
|
|
c.stateLock.Unlock()
|
|
metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime)
|
|
return
|
|
}
|
|
}
|
|
|
|
// Clear previous local cluster cert info so we generate new. Since the
|
|
// UUID will have changed, standbys will know to look for new info
|
|
c.localClusterParsedCert.Store((*x509.Certificate)(nil))
|
|
c.localClusterCert.Store(([]byte)(nil))
|
|
c.localClusterPrivateKey.Store((*ecdsa.PrivateKey)(nil))
|
|
|
|
if err := c.setupCluster(ctx); err != nil {
|
|
c.heldHALock = nil
|
|
lock.Unlock()
|
|
c.stateLock.Unlock()
|
|
c.logger.Error("cluster setup failed", "error", err)
|
|
metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime)
|
|
continue
|
|
}
|
|
|
|
// Advertise as leader
|
|
if err := c.advertiseLeader(ctx, uuid, leaderLostCh); err != nil {
|
|
c.heldHALock = nil
|
|
lock.Unlock()
|
|
c.stateLock.Unlock()
|
|
c.logger.Error("leader advertisement setup failed", "error", err)
|
|
metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime)
|
|
continue
|
|
}
|
|
|
|
// Attempt the post-unseal process
|
|
err = c.postUnseal()
|
|
if err == nil {
|
|
c.standby = false
|
|
}
|
|
|
|
c.stateLock.Unlock()
|
|
|
|
// Handle a failure to unseal
|
|
if err != nil {
|
|
c.logger.Error("post-unseal setup failed", "error", err)
|
|
lock.Unlock()
|
|
metrics.MeasureSince([]string{"core", "leadership_setup_failed"}, activeTime)
|
|
continue
|
|
}
|
|
|
|
// Monitor a loss of leadership
|
|
releaseHALock := true
|
|
grabStateLock := true
|
|
select {
|
|
case <-leaderLostCh:
|
|
c.logger.Warn("leadership lost, stopping active operation")
|
|
case <-stopCh:
|
|
// This case comes from sealInternal; we will already be having the
|
|
// state lock held so we do toggle grabStateLock to false
|
|
if atomic.LoadUint32(&c.keepHALockOnStepDown) == 1 {
|
|
releaseHALock = false
|
|
}
|
|
grabStateLock = false
|
|
case <-manualStepDownCh:
|
|
c.logger.Warn("stepping down from active operation to standby")
|
|
manualStepDown = true
|
|
}
|
|
|
|
metrics.MeasureSince([]string{"core", "leadership_lost"}, activeTime)
|
|
|
|
// Tell any requests that know about this to stop
|
|
if c.activeContextCancelFunc != nil {
|
|
c.activeContextCancelFunc()
|
|
}
|
|
|
|
// Attempt the pre-seal process
|
|
if grabStateLock {
|
|
c.stateLock.Lock()
|
|
}
|
|
c.standby = true
|
|
preSealErr := c.preSeal()
|
|
if grabStateLock {
|
|
c.stateLock.Unlock()
|
|
}
|
|
|
|
if releaseHALock {
|
|
if err := c.clearLeader(uuid); err != nil {
|
|
c.logger.Error("clearing leader advertisement failed", "error", err)
|
|
}
|
|
c.heldHALock.Unlock()
|
|
c.heldHALock = nil
|
|
}
|
|
|
|
// Check for a failure to prepare to seal
|
|
if preSealErr != nil {
|
|
c.logger.Error("pre-seal teardown failed", "error", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// This checks the leader periodically to ensure that we switch RPC to a new
|
|
// leader pretty quickly. There is logic in Leader() already to not make this
|
|
// onerous and avoid more traffic than needed, so we just call that and ignore
|
|
// the result.
|
|
func (c *Core) periodicLeaderRefresh(doneCh, stopCh chan struct{}) {
|
|
defer close(doneCh)
|
|
var opCount int32
|
|
for {
|
|
select {
|
|
case <-time.After(leaderCheckInterval):
|
|
count := atomic.AddInt32(&opCount, 1)
|
|
if count > 1 {
|
|
atomic.AddInt32(&opCount, -1)
|
|
continue
|
|
}
|
|
// We do this in a goroutine because otherwise if this refresh is
|
|
// called while we're shutting down the call to Leader() can
|
|
// deadlock, which then means stopCh can never been seen and we can
|
|
// block shutdown
|
|
go func() {
|
|
defer atomic.AddInt32(&opCount, -1)
|
|
c.Leader()
|
|
}()
|
|
case <-stopCh:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// periodicCheckKeyUpgrade is used to watch for key rotation events as a standby
|
|
func (c *Core) periodicCheckKeyUpgrade(ctx context.Context, doneCh, stopCh chan struct{}) {
|
|
defer close(doneCh)
|
|
var opCount int32
|
|
for {
|
|
select {
|
|
case <-time.After(keyRotateCheckInterval):
|
|
count := atomic.AddInt32(&opCount, 1)
|
|
if count > 1 {
|
|
atomic.AddInt32(&opCount, -1)
|
|
continue
|
|
}
|
|
|
|
go func() {
|
|
defer atomic.AddInt32(&opCount, -1)
|
|
// Only check if we are a standby
|
|
c.stateLock.RLock()
|
|
standby := c.standby
|
|
c.stateLock.RUnlock()
|
|
if !standby {
|
|
return
|
|
}
|
|
|
|
// Check for a poison pill. If we can read it, it means we have stale
|
|
// keys (e.g. from replication being activated) and we need to seal to
|
|
// be unsealed again.
|
|
entry, _ := c.barrier.Get(ctx, poisonPillPath)
|
|
if entry != nil && len(entry.Value) > 0 {
|
|
c.logger.Warn("encryption keys have changed out from underneath us (possibly due to replication enabling), must be unsealed again")
|
|
go c.Shutdown()
|
|
return
|
|
}
|
|
|
|
if err := c.checkKeyUpgrades(ctx); err != nil {
|
|
c.logger.Error("key rotation periodic upgrade check failed", "error", err)
|
|
}
|
|
}()
|
|
case <-stopCh:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// checkKeyUpgrades is used to check if there have been any key rotations
|
|
// and if there is a chain of upgrades available
|
|
func (c *Core) checkKeyUpgrades(ctx context.Context) error {
|
|
for {
|
|
// Check for an upgrade
|
|
didUpgrade, newTerm, err := c.barrier.CheckUpgrade(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Nothing to do if no upgrade
|
|
if !didUpgrade {
|
|
break
|
|
}
|
|
if c.logger.IsInfo() {
|
|
c.logger.Info("upgraded to new key term", "term", newTerm)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// scheduleUpgradeCleanup is used to ensure that all the upgrade paths
|
|
// are cleaned up in a timely manner if a leader failover takes place
|
|
func (c *Core) scheduleUpgradeCleanup(ctx context.Context) error {
|
|
// List the upgrades
|
|
upgrades, err := c.barrier.List(ctx, keyringUpgradePrefix)
|
|
if err != nil {
|
|
return errwrap.Wrapf("failed to list upgrades: {{err}}", err)
|
|
}
|
|
|
|
// Nothing to do if no upgrades
|
|
if len(upgrades) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Schedule cleanup for all of them
|
|
time.AfterFunc(keyRotateGracePeriod, func() {
|
|
sealed, err := c.barrier.Sealed()
|
|
if err != nil {
|
|
c.logger.Warn("failed to check barrier status at upgrade cleanup time")
|
|
return
|
|
}
|
|
if sealed {
|
|
c.logger.Warn("barrier sealed at upgrade cleanup time")
|
|
return
|
|
}
|
|
for _, upgrade := range upgrades {
|
|
path := fmt.Sprintf("%s%s", keyringUpgradePrefix, upgrade)
|
|
if err := c.barrier.Delete(ctx, path); err != nil {
|
|
c.logger.Error("failed to cleanup upgrade", "path", path, "error", err)
|
|
}
|
|
}
|
|
})
|
|
return nil
|
|
}
|
|
|
|
func (c *Core) performKeyUpgrades(ctx context.Context) error {
|
|
if err := c.checkKeyUpgrades(ctx); err != nil {
|
|
return errwrap.Wrapf("error checking for key upgrades: {{err}}", err)
|
|
}
|
|
|
|
if err := c.barrier.ReloadMasterKey(ctx); err != nil {
|
|
return errwrap.Wrapf("error reloading master key: {{err}}", err)
|
|
}
|
|
|
|
if err := c.barrier.ReloadKeyring(ctx); err != nil {
|
|
return errwrap.Wrapf("error reloading keyring: {{err}}", err)
|
|
}
|
|
|
|
if err := c.scheduleUpgradeCleanup(ctx); err != nil {
|
|
return errwrap.Wrapf("error scheduling upgrade cleanup: {{err}}", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// acquireLock blocks until the lock is acquired, returning the leaderLostCh
|
|
func (c *Core) acquireLock(lock physical.Lock, stopCh <-chan struct{}) <-chan struct{} {
|
|
for {
|
|
// Attempt lock acquisition
|
|
leaderLostCh, err := lock.Lock(stopCh)
|
|
if err == nil {
|
|
return leaderLostCh
|
|
}
|
|
|
|
// Retry the acquisition
|
|
c.logger.Error("failed to acquire lock", "error", err)
|
|
select {
|
|
case <-time.After(lockRetryInterval):
|
|
case <-stopCh:
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// advertiseLeader is used to advertise the current node as leader
|
|
func (c *Core) advertiseLeader(ctx context.Context, uuid string, leaderLostCh <-chan struct{}) error {
|
|
go c.cleanLeaderPrefix(ctx, uuid, leaderLostCh)
|
|
|
|
var key *ecdsa.PrivateKey
|
|
switch c.localClusterPrivateKey.Load().(type) {
|
|
case *ecdsa.PrivateKey:
|
|
key = c.localClusterPrivateKey.Load().(*ecdsa.PrivateKey)
|
|
default:
|
|
c.logger.Error("unknown cluster private key type", "key_type", fmt.Sprintf("%T", c.localClusterPrivateKey.Load()))
|
|
return fmt.Errorf("unknown cluster private key type %T", c.localClusterPrivateKey.Load())
|
|
}
|
|
|
|
keyParams := &clusterKeyParams{
|
|
Type: corePrivateKeyTypeP521,
|
|
X: key.X,
|
|
Y: key.Y,
|
|
D: key.D,
|
|
}
|
|
|
|
locCert := c.localClusterCert.Load().([]byte)
|
|
localCert := make([]byte, len(locCert))
|
|
copy(localCert, locCert)
|
|
adv := &activeAdvertisement{
|
|
RedirectAddr: c.redirectAddr,
|
|
ClusterAddr: c.clusterAddr,
|
|
ClusterCert: localCert,
|
|
ClusterKeyParams: keyParams,
|
|
}
|
|
val, err := jsonutil.EncodeJSON(adv)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ent := &Entry{
|
|
Key: coreLeaderPrefix + uuid,
|
|
Value: val,
|
|
}
|
|
err = c.barrier.Put(ctx, ent)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
sd, ok := c.ha.(physical.ServiceDiscovery)
|
|
if ok {
|
|
if err := sd.NotifyActiveStateChange(); err != nil {
|
|
if c.logger.IsWarn() {
|
|
c.logger.Warn("failed to notify active status", "error", err)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *Core) cleanLeaderPrefix(ctx context.Context, uuid string, leaderLostCh <-chan struct{}) {
|
|
keys, err := c.barrier.List(ctx, coreLeaderPrefix)
|
|
if err != nil {
|
|
c.logger.Error("failed to list entries in core/leader", "error", err)
|
|
return
|
|
}
|
|
for len(keys) > 0 {
|
|
select {
|
|
case <-time.After(leaderPrefixCleanDelay):
|
|
if keys[0] != uuid {
|
|
c.barrier.Delete(ctx, coreLeaderPrefix+keys[0])
|
|
}
|
|
keys = keys[1:]
|
|
case <-leaderLostCh:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// clearLeader is used to clear our leadership entry
|
|
func (c *Core) clearLeader(uuid string) error {
|
|
key := coreLeaderPrefix + uuid
|
|
err := c.barrier.Delete(c.activeContext, key)
|
|
|
|
// Advertise ourselves as a standby
|
|
sd, ok := c.ha.(physical.ServiceDiscovery)
|
|
if ok {
|
|
if err := sd.NotifyActiveStateChange(); err != nil {
|
|
if c.logger.IsWarn() {
|
|
c.logger.Warn("failed to notify standby status", "error", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
// emitMetrics is used to periodically expose metrics while running
|
|
func (c *Core) emitMetrics(stopCh chan struct{}) {
|
|
for {
|
|
select {
|
|
case <-time.After(time.Second):
|
|
c.metricsMutex.Lock()
|
|
if c.expiration != nil {
|
|
c.expiration.emitMetrics()
|
|
}
|
|
c.metricsMutex.Unlock()
|
|
case <-stopCh:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *Core) ReplicationState() consts.ReplicationState {
|
|
return consts.ReplicationState(atomic.LoadUint32(c.replicationState))
|
|
}
|
|
|
|
func (c *Core) ActiveNodeReplicationState() consts.ReplicationState {
|
|
return consts.ReplicationState(atomic.LoadUint32(c.activeNodeReplicationState))
|
|
}
|
|
|
|
func (c *Core) SealAccess() *SealAccess {
|
|
return NewSealAccess(c.seal)
|
|
}
|
|
|
|
func (c *Core) Logger() log.Logger {
|
|
return c.logger
|
|
}
|
|
|
|
func (c *Core) BarrierKeyLength() (min, max int) {
|
|
min, max = c.barrier.KeyLength()
|
|
max += shamir.ShareOverhead
|
|
return
|
|
}
|
|
|
|
func (c *Core) AuditedHeadersConfig() *AuditedHeadersConfig {
|
|
return c.auditedHeaders
|
|
}
|
|
|
|
func lastRemoteWALImpl(c *Core) uint64 {
|
|
return 0
|
|
}
|
|
|
|
func (c *Core) BarrierEncryptorAccess() *BarrierEncryptorAccess {
|
|
return NewBarrierEncryptorAccess(c.barrier)
|
|
}
|
|
|
|
func (c *Core) PhysicalAccess() *physical.PhysicalAccess {
|
|
return physical.NewPhysicalAccess(c.physical)
|
|
}
|
|
|
|
func (c *Core) RouterAccess() *RouterAccess {
|
|
return NewRouterAccess(c)
|
|
}
|
|
|
|
// IsDRSecondary returns if the current cluster state is a DR secondary.
|
|
func (c *Core) IsDRSecondary() bool {
|
|
return c.ReplicationState().HasState(consts.ReplicationDRSecondary)
|
|
}
|