ebe333b947
* convert tableIndex to use the new pattern * make `indexFromString` available for oss as well * refactor `indexUpdateMaxTxn`
315 lines
9.8 KiB
Go
315 lines
9.8 KiB
Go
package state
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
memdb "github.com/hashicorp/go-memdb"
|
|
|
|
"github.com/hashicorp/consul/agent/consul/stream"
|
|
"github.com/hashicorp/consul/agent/structs"
|
|
)
|
|
|
|
var (
|
|
// ErrMissingNode is the error returned when trying an operation
|
|
// which requires a node registration but none exists.
|
|
ErrMissingNode = errors.New("Missing node registration")
|
|
|
|
// ErrMissingService is the error we return if trying an
|
|
// operation which requires a service but none exists.
|
|
ErrMissingService = errors.New("Missing service registration")
|
|
|
|
// ErrMissingSessionID is returned when a session registration
|
|
// is attempted with an empty session ID.
|
|
ErrMissingSessionID = errors.New("Missing session ID")
|
|
|
|
// ErrMissingACLTokenSecret is returned when a token set is called on a
|
|
// token with an empty SecretID.
|
|
ErrMissingACLTokenSecret = errors.New("Missing ACL Token SecretID")
|
|
|
|
// ErrMissingACLTokenAccessor is returned when a token set is called on a
|
|
// token with an empty AccessorID.
|
|
ErrMissingACLTokenAccessor = errors.New("Missing ACL Token AccessorID")
|
|
|
|
// ErrTokenHasNoPrivileges is returned when a token set is called on a
|
|
// token with no policies, roles, or service identities and the caller
|
|
// requires at least one to be set.
|
|
ErrTokenHasNoPrivileges = errors.New("Token has no privileges")
|
|
|
|
// ErrMissingACLPolicyID is returned when a policy set is called on a
|
|
// policy with an empty ID.
|
|
ErrMissingACLPolicyID = errors.New("Missing ACL Policy ID")
|
|
|
|
// ErrMissingACLPolicyName is returned when a policy set is called on a
|
|
// policy with an empty Name.
|
|
ErrMissingACLPolicyName = errors.New("Missing ACL Policy Name")
|
|
|
|
// ErrMissingACLRoleID is returned when a role set is called on
|
|
// a role with an empty ID.
|
|
ErrMissingACLRoleID = errors.New("Missing ACL Role ID")
|
|
|
|
// ErrMissingACLRoleName is returned when a role set is called on
|
|
// a role with an empty Name.
|
|
ErrMissingACLRoleName = errors.New("Missing ACL Role Name")
|
|
|
|
// ErrMissingACLBindingRuleID is returned when a binding rule set
|
|
// is called on a binding rule with an empty ID.
|
|
ErrMissingACLBindingRuleID = errors.New("Missing ACL Binding Rule ID")
|
|
|
|
// ErrMissingACLBindingRuleAuthMethod is returned when a binding rule set
|
|
// is called on a binding rule with an empty AuthMethod.
|
|
ErrMissingACLBindingRuleAuthMethod = errors.New("Missing ACL Binding Rule Auth Method")
|
|
|
|
// ErrMissingACLAuthMethodName is returned when an auth method set is
|
|
// called on an auth method with an empty Name.
|
|
ErrMissingACLAuthMethodName = errors.New("Missing ACL Auth Method Name")
|
|
|
|
// ErrMissingACLAuthMethodType is returned when an auth method set is
|
|
// called on an auth method with an empty Type.
|
|
ErrMissingACLAuthMethodType = errors.New("Missing ACL Auth Method Type")
|
|
|
|
// ErrMissingQueryID is returned when a Query set is called on
|
|
// a Query with an empty ID.
|
|
ErrMissingQueryID = errors.New("Missing Query ID")
|
|
|
|
// ErrMissingCARootID is returned when an CARoot set is called
|
|
// with an CARoot with an empty ID.
|
|
ErrMissingCARootID = errors.New("Missing CA Root ID")
|
|
|
|
// ErrMissingIntentionID is returned when an Intention set is called
|
|
// with an Intention with an empty ID.
|
|
ErrMissingIntentionID = errors.New("Missing Intention ID")
|
|
)
|
|
|
|
var (
|
|
// watchLimit is used as a soft limit to cap how many watches we allow
|
|
// for a given blocking query. If this is exceeded, then we will use a
|
|
// higher-level watch that's less fine-grained. Choosing the perfect
|
|
// value is impossible given how different deployments and workload
|
|
// are. This value was recommended by customers with many servers. We
|
|
// expect streaming to arrive soon and that should help a lot with
|
|
// blocking queries. Please see
|
|
// https://github.com/hashicorp/consul/pull/7200 and linked issues/prs
|
|
// for more context
|
|
watchLimit = 8192
|
|
)
|
|
|
|
// Store is where we store all of Consul's state, including
|
|
// records of node registrations, services, checks, key/value
|
|
// pairs and more. The DB is entirely in-memory and is constructed
|
|
// from the Raft log through the FSM.
|
|
type Store struct {
|
|
schema *memdb.DBSchema
|
|
db *changeTrackerDB
|
|
|
|
// abandonCh is used to signal watchers that this state store has been
|
|
// abandoned (usually during a restore). This is only ever closed.
|
|
abandonCh chan struct{}
|
|
|
|
// TODO: refactor abondonCh to use a context so that both can use the same
|
|
// cancel mechanism.
|
|
stopEventPublisher func()
|
|
|
|
// kvsGraveyard manages tombstones for the key value store.
|
|
kvsGraveyard *Graveyard
|
|
|
|
// lockDelay holds expiration times for locks associated with keys.
|
|
lockDelay *Delay
|
|
}
|
|
|
|
// Snapshot is used to provide a point-in-time snapshot. It
|
|
// works by starting a read transaction against the whole state store.
|
|
type Snapshot struct {
|
|
store *Store
|
|
tx AbortTxn
|
|
lastIndex uint64
|
|
}
|
|
|
|
// Restore is used to efficiently manage restoring a large amount of
|
|
// data to a state store.
|
|
type Restore struct {
|
|
store *Store
|
|
tx *txn
|
|
}
|
|
|
|
// sessionCheck is used to create a many-to-one table such that
|
|
// each check registered by a session can be mapped back to the
|
|
// session table. This is only used internally in the state
|
|
// store and thus it is not exported.
|
|
type sessionCheck struct {
|
|
Node string
|
|
Session string
|
|
|
|
CheckID structs.CheckID
|
|
structs.EnterpriseMeta
|
|
}
|
|
|
|
// NewStateStore creates a new in-memory state storage layer.
|
|
func NewStateStore(gc *TombstoneGC) *Store {
|
|
// Create the in-memory DB.
|
|
schema := newDBSchema()
|
|
db, err := memdb.NewMemDB(schema)
|
|
if err != nil {
|
|
// the only way for NewMemDB to error is if the schema is invalid. The
|
|
// scheme is static and tested to be correct, so any failure here would
|
|
// be a programming error, which should panic.
|
|
panic(fmt.Sprintf("failed to create state store: %v", err))
|
|
}
|
|
s := &Store{
|
|
schema: schema,
|
|
abandonCh: make(chan struct{}),
|
|
kvsGraveyard: NewGraveyard(gc),
|
|
lockDelay: NewDelay(),
|
|
stopEventPublisher: func() {},
|
|
db: &changeTrackerDB{
|
|
db: db,
|
|
publisher: stream.NoOpEventPublisher{},
|
|
processChanges: processDBChanges,
|
|
},
|
|
}
|
|
return s
|
|
}
|
|
|
|
func NewStateStoreWithEventPublisher(gc *TombstoneGC) *Store {
|
|
store := NewStateStore(gc)
|
|
ctx, cancel := context.WithCancel(context.TODO())
|
|
store.stopEventPublisher = cancel
|
|
|
|
pub := stream.NewEventPublisher(newSnapshotHandlers((*readDB)(store.db.db)), 10*time.Second)
|
|
store.db.publisher = pub
|
|
|
|
go pub.Run(ctx)
|
|
return store
|
|
}
|
|
|
|
// EventPublisher returns the stream.EventPublisher used by the Store to
|
|
// publish events.
|
|
func (s *Store) EventPublisher() EventPublisher {
|
|
return s.db.publisher
|
|
}
|
|
|
|
// Snapshot is used to create a point-in-time snapshot of the entire db.
|
|
func (s *Store) Snapshot() *Snapshot {
|
|
tx := s.db.Txn(false)
|
|
|
|
var tables []string
|
|
for table := range s.schema.Tables {
|
|
tables = append(tables, table)
|
|
}
|
|
idx := maxIndexTxn(tx, tables...)
|
|
|
|
return &Snapshot{s, tx, idx}
|
|
}
|
|
|
|
// LastIndex returns that last index that affects the snapshotted data.
|
|
func (s *Snapshot) LastIndex() uint64 {
|
|
return s.lastIndex
|
|
}
|
|
|
|
func (s *Snapshot) Indexes() (memdb.ResultIterator, error) {
|
|
return s.tx.Get(tableIndex, indexID)
|
|
}
|
|
|
|
// IndexRestore is used to restore an index
|
|
func (s *Restore) IndexRestore(idx *IndexEntry) error {
|
|
if err := s.tx.Insert(tableIndex, idx); err != nil {
|
|
return fmt.Errorf("index insert failed: %v", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Close performs cleanup of a state snapshot.
|
|
func (s *Snapshot) Close() {
|
|
s.tx.Abort()
|
|
}
|
|
|
|
// Restore is used to efficiently manage restoring a large amount of data into
|
|
// the state store. It works by doing all the restores inside of a single
|
|
// transaction.
|
|
func (s *Store) Restore() *Restore {
|
|
tx := s.db.WriteTxnRestore()
|
|
return &Restore{s, tx}
|
|
}
|
|
|
|
// Abort abandons the changes made by a restore. This or Commit should always be
|
|
// called.
|
|
func (s *Restore) Abort() {
|
|
s.tx.Abort()
|
|
}
|
|
|
|
// Commit commits the changes made by a restore. This or Abort should always be
|
|
// called.
|
|
func (s *Restore) Commit() error {
|
|
return s.tx.Commit()
|
|
}
|
|
|
|
// AbandonCh returns a channel you can wait on to know if the state store was
|
|
// abandoned.
|
|
func (s *Store) AbandonCh() <-chan struct{} {
|
|
return s.abandonCh
|
|
}
|
|
|
|
// Abandon is used to signal that the given state store has been abandoned.
|
|
// Calling this more than one time will panic.
|
|
func (s *Store) Abandon() {
|
|
s.stopEventPublisher()
|
|
close(s.abandonCh)
|
|
}
|
|
|
|
// maxIndex is a helper used to retrieve the highest known index
|
|
// amongst a set of tables in the db.
|
|
func (s *Store) maxIndex(tables ...string) uint64 {
|
|
tx := s.db.Txn(false)
|
|
defer tx.Abort()
|
|
return maxIndexTxn(tx, tables...)
|
|
}
|
|
|
|
// maxIndexTxn is a helper used to retrieve the highest known index
|
|
// amongst a set of tables in the db.
|
|
func maxIndexTxn(tx ReadTxn, tables ...string) uint64 {
|
|
return maxIndexWatchTxn(tx, nil, tables...)
|
|
}
|
|
|
|
func maxIndexWatchTxn(tx ReadTxn, ws memdb.WatchSet, tables ...string) uint64 {
|
|
var lindex uint64
|
|
for _, table := range tables {
|
|
ch, ti, err := tx.FirstWatch(tableIndex, "id", table)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("unknown index: %s err: %s", table, err))
|
|
}
|
|
if idx, ok := ti.(*IndexEntry); ok && idx.Value > lindex {
|
|
lindex = idx.Value
|
|
}
|
|
ws.Add(ch)
|
|
}
|
|
return lindex
|
|
}
|
|
|
|
// indexUpdateMaxTxn is used when restoring entries and sets the table's index to
|
|
// the given idx only if it's greater than the current index.
|
|
func indexUpdateMaxTxn(tx WriteTxn, idx uint64, table string) error {
|
|
ti, err := tx.First(tableIndex, indexID, table)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to retrieve existing index: %s", err)
|
|
}
|
|
|
|
// if this is an update check the idx
|
|
if ti != nil {
|
|
cur, ok := ti.(*IndexEntry)
|
|
if !ok {
|
|
return fmt.Errorf("failed updating index %T need to be `*IndexEntry`", ti)
|
|
}
|
|
// Stored index is newer, don't insert the index
|
|
if idx <= cur.Value {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
if err := tx.Insert(tableIndex, &IndexEntry{table, idx}); err != nil {
|
|
return fmt.Errorf("failed updating index %s", err)
|
|
}
|
|
return nil
|
|
}
|