ed14061578
* Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
153 lines
3.9 KiB
Go
153 lines
3.9 KiB
Go
package raft
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"time"
|
|
|
|
"github.com/armon/go-metrics"
|
|
)
|
|
|
|
// FSM provides an interface that can be implemented by
|
|
// clients to make use of the replicated log.
|
|
type FSM interface {
|
|
// Apply log is invoked once a log entry is committed.
|
|
// It returns a value which will be made available in the
|
|
// ApplyFuture returned by Raft.Apply method if that
|
|
// method was called on the same Raft node as the FSM.
|
|
Apply(*Log) interface{}
|
|
|
|
// Snapshot is used to support log compaction. This call should
|
|
// return an FSMSnapshot which can be used to save a point-in-time
|
|
// snapshot of the FSM. Apply and Snapshot are not called in multiple
|
|
// threads, but Apply will be called concurrently with Persist. This means
|
|
// the FSM should be implemented in a fashion that allows for concurrent
|
|
// updates while a snapshot is happening.
|
|
Snapshot() (FSMSnapshot, error)
|
|
|
|
// Restore is used to restore an FSM from a snapshot. It is not called
|
|
// concurrently with any other command. The FSM must discard all previous
|
|
// state.
|
|
Restore(io.ReadCloser) error
|
|
}
|
|
|
|
// FSMSnapshot is returned by an FSM in response to a Snapshot
|
|
// It must be safe to invoke FSMSnapshot methods with concurrent
|
|
// calls to Apply.
|
|
type FSMSnapshot interface {
|
|
// Persist should dump all necessary state to the WriteCloser 'sink',
|
|
// and call sink.Close() when finished or call sink.Cancel() on error.
|
|
Persist(sink SnapshotSink) error
|
|
|
|
// Release is invoked when we are finished with the snapshot.
|
|
Release()
|
|
}
|
|
|
|
// runFSM is a long running goroutine responsible for applying logs
|
|
// to the FSM. This is done async of other logs since we don't want
|
|
// the FSM to block our internal operations.
|
|
func (r *Raft) runFSM() {
|
|
var lastIndex, lastTerm uint64
|
|
|
|
commit := func(req *commitTuple) {
|
|
// Apply the log if a command or config change
|
|
var resp interface{}
|
|
// Make sure we send a response
|
|
defer func() {
|
|
// Invoke the future if given
|
|
if req.future != nil {
|
|
req.future.response = resp
|
|
req.future.respond(nil)
|
|
}
|
|
}()
|
|
|
|
switch req.log.Type {
|
|
case LogCommand:
|
|
start := time.Now()
|
|
resp = r.fsm.Apply(req.log)
|
|
metrics.MeasureSince([]string{"raft", "fsm", "apply"}, start)
|
|
|
|
case LogConfiguration:
|
|
configStore, ok := r.fsm.(ConfigurationStore)
|
|
if !ok {
|
|
// Return early to avoid incrementing the index and term for
|
|
// an unimplemented operation.
|
|
return
|
|
}
|
|
|
|
start := time.Now()
|
|
configStore.StoreConfiguration(req.log.Index, decodeConfiguration(req.log.Data))
|
|
metrics.MeasureSince([]string{"raft", "fsm", "store_config"}, start)
|
|
}
|
|
|
|
// Update the indexes
|
|
lastIndex = req.log.Index
|
|
lastTerm = req.log.Term
|
|
}
|
|
|
|
restore := func(req *restoreFuture) {
|
|
// Open the snapshot
|
|
meta, source, err := r.snapshots.Open(req.ID)
|
|
if err != nil {
|
|
req.respond(fmt.Errorf("failed to open snapshot %v: %v", req.ID, err))
|
|
return
|
|
}
|
|
|
|
// Attempt to restore
|
|
start := time.Now()
|
|
if err := r.fsm.Restore(source); err != nil {
|
|
req.respond(fmt.Errorf("failed to restore snapshot %v: %v", req.ID, err))
|
|
source.Close()
|
|
return
|
|
}
|
|
source.Close()
|
|
metrics.MeasureSince([]string{"raft", "fsm", "restore"}, start)
|
|
|
|
// Update the last index and term
|
|
lastIndex = meta.Index
|
|
lastTerm = meta.Term
|
|
req.respond(nil)
|
|
}
|
|
|
|
snapshot := func(req *reqSnapshotFuture) {
|
|
// Is there something to snapshot?
|
|
if lastIndex == 0 {
|
|
req.respond(ErrNothingNewToSnapshot)
|
|
return
|
|
}
|
|
|
|
// Start a snapshot
|
|
start := time.Now()
|
|
snap, err := r.fsm.Snapshot()
|
|
metrics.MeasureSince([]string{"raft", "fsm", "snapshot"}, start)
|
|
|
|
// Respond to the request
|
|
req.index = lastIndex
|
|
req.term = lastTerm
|
|
req.snapshot = snap
|
|
req.respond(err)
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case ptr := <-r.fsmMutateCh:
|
|
switch req := ptr.(type) {
|
|
case *commitTuple:
|
|
commit(req)
|
|
|
|
case *restoreFuture:
|
|
restore(req)
|
|
|
|
default:
|
|
panic(fmt.Errorf("bad type passed to fsmMutateCh: %#v", ptr))
|
|
}
|
|
|
|
case req := <-r.fsmSnapshotCh:
|
|
snapshot(req)
|
|
|
|
case <-r.shutdownCh:
|
|
return
|
|
}
|
|
}
|
|
}
|