ed14061578
* Work on raft backend * Add logstore locally * Add encryptor and unsealable interfaces * Add clustering support to raft * Remove client and handler * Bootstrap raft on init * Cleanup raft logic a bit * More raft work * Work on TLS config * More work on bootstrapping * Fix build * More work on bootstrapping * More bootstrapping work * fix build * Remove consul dep * Fix build * merged oss/master into raft-storage * Work on bootstrapping * Get bootstrapping to work * Clean up FMS and node-id * Update local node ID logic * Cleanup node-id change * Work on snapshotting * Raft: Add remove peer API (#906) * Add remove peer API * Add some comments * Fix existing snapshotting (#909) * Raft get peers API (#912) * Read raft configuration * address review feedback * Use the Leadership Transfer API to step-down the active node (#918) * Raft join and unseal using Shamir keys (#917) * Raft join using shamir * Store AEAD instead of master key * Split the raft join process to answer the challenge after a successful unseal * get the follower to standby state * Make unseal work * minor changes * Some input checks * reuse the shamir seal access instead of new default seal access * refactor joinRaftSendAnswer function * Synchronously send answer in auto-unseal case * Address review feedback * Raft snapshots (#910) * Fix existing snapshotting * implement the noop snapshotting * Add comments and switch log libraries * add some snapshot tests * add snapshot test file * add TODO * More work on raft snapshotting * progress on the ConfigStore strategy * Don't use two buckets * Update the snapshot store logic to hide the file logic * Add more backend tests * Cleanup code a bit * [WIP] Raft recovery (#938) * Add recovery functionality * remove fmt.Printfs * Fix a few fsm bugs * Add max size value for raft backend (#942) * Add max size value for raft backend * Include physical.ErrValueTooLarge in the message * Raft snapshot Take/Restore API (#926) * Inital work on raft snapshot APIs * Always redirect snapshot install/download requests * More work on the snapshot APIs * Cleanup code a bit * On restore handle special cases * Use the seal to encrypt the sha sum file * Add sealer mechanism and fix some bugs * Call restore while state lock is held * Send restore cb trigger through raft log * Make error messages nicer * Add test helpers * Add snapshot test * Add shamir unseal test * Add more raft snapshot API tests * Fix locking * Change working to initalize * Add underlying raw object to test cluster core * Move leaderUUID to core * Add raft TLS rotation logic (#950) * Add TLS rotation logic * Cleanup logic a bit * Add/Remove from follower state on add/remove peer * add comments * Update more comments * Update request_forwarding_service.proto * Make sure we populate all nodes in the followerstate obj * Update times * Apply review feedback * Add more raft config setting (#947) * Add performance config setting * Add more config options and fix tests * Test Raft Recovery (#944) * Test raft recovery * Leave out a node during recovery * remove unused struct * Update physical/raft/snapshot_test.go * Update physical/raft/snapshot_test.go * fix vendoring * Switch to new raft interface * Remove unused files * Switch a gogo -> proto instance * Remove unneeded vault dep in go.sum * Update helper/testhelpers/testhelpers.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * Update vault/cluster/cluster.go * track active key within the keyring itself (#6915) * track active key within the keyring itself * lookup and store using the active key ID * update docstring * minor refactor * Small text fixes (#6912) * Update physical/raft/raft.go Co-Authored-By: Calvin Leung Huang <cleung2010@gmail.com> * review feedback * Move raft logical system into separate file * Update help text a bit * Enforce cluster addr is set and use it for raft bootstrapping * Fix tests * fix http test panic * Pull in latest raft-snapshot library * Add comment
397 lines
11 KiB
Go
397 lines
11 KiB
Go
package bbolt
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"sort"
|
|
)
|
|
|
|
// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order.
|
|
// Cursors see nested buckets with value == nil.
|
|
// Cursors can be obtained from a transaction and are valid as long as the transaction is open.
|
|
//
|
|
// Keys and values returned from the cursor are only valid for the life of the transaction.
|
|
//
|
|
// Changing data while traversing with a cursor may cause it to be invalidated
|
|
// and return unexpected keys and/or values. You must reposition your cursor
|
|
// after mutating data.
|
|
type Cursor struct {
|
|
bucket *Bucket
|
|
stack []elemRef
|
|
}
|
|
|
|
// Bucket returns the bucket that this cursor was created from.
|
|
func (c *Cursor) Bucket() *Bucket {
|
|
return c.bucket
|
|
}
|
|
|
|
// First moves the cursor to the first item in the bucket and returns its key and value.
|
|
// If the bucket is empty then a nil key and value are returned.
|
|
// The returned key and value are only valid for the life of the transaction.
|
|
func (c *Cursor) First() (key []byte, value []byte) {
|
|
_assert(c.bucket.tx.db != nil, "tx closed")
|
|
c.stack = c.stack[:0]
|
|
p, n := c.bucket.pageNode(c.bucket.root)
|
|
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
|
c.first()
|
|
|
|
// If we land on an empty page then move to the next value.
|
|
// https://github.com/boltdb/bolt/issues/450
|
|
if c.stack[len(c.stack)-1].count() == 0 {
|
|
c.next()
|
|
}
|
|
|
|
k, v, flags := c.keyValue()
|
|
if (flags & uint32(bucketLeafFlag)) != 0 {
|
|
return k, nil
|
|
}
|
|
return k, v
|
|
|
|
}
|
|
|
|
// Last moves the cursor to the last item in the bucket and returns its key and value.
|
|
// If the bucket is empty then a nil key and value are returned.
|
|
// The returned key and value are only valid for the life of the transaction.
|
|
func (c *Cursor) Last() (key []byte, value []byte) {
|
|
_assert(c.bucket.tx.db != nil, "tx closed")
|
|
c.stack = c.stack[:0]
|
|
p, n := c.bucket.pageNode(c.bucket.root)
|
|
ref := elemRef{page: p, node: n}
|
|
ref.index = ref.count() - 1
|
|
c.stack = append(c.stack, ref)
|
|
c.last()
|
|
k, v, flags := c.keyValue()
|
|
if (flags & uint32(bucketLeafFlag)) != 0 {
|
|
return k, nil
|
|
}
|
|
return k, v
|
|
}
|
|
|
|
// Next moves the cursor to the next item in the bucket and returns its key and value.
|
|
// If the cursor is at the end of the bucket then a nil key and value are returned.
|
|
// The returned key and value are only valid for the life of the transaction.
|
|
func (c *Cursor) Next() (key []byte, value []byte) {
|
|
_assert(c.bucket.tx.db != nil, "tx closed")
|
|
k, v, flags := c.next()
|
|
if (flags & uint32(bucketLeafFlag)) != 0 {
|
|
return k, nil
|
|
}
|
|
return k, v
|
|
}
|
|
|
|
// Prev moves the cursor to the previous item in the bucket and returns its key and value.
|
|
// If the cursor is at the beginning of the bucket then a nil key and value are returned.
|
|
// The returned key and value are only valid for the life of the transaction.
|
|
func (c *Cursor) Prev() (key []byte, value []byte) {
|
|
_assert(c.bucket.tx.db != nil, "tx closed")
|
|
|
|
// Attempt to move back one element until we're successful.
|
|
// Move up the stack as we hit the beginning of each page in our stack.
|
|
for i := len(c.stack) - 1; i >= 0; i-- {
|
|
elem := &c.stack[i]
|
|
if elem.index > 0 {
|
|
elem.index--
|
|
break
|
|
}
|
|
c.stack = c.stack[:i]
|
|
}
|
|
|
|
// If we've hit the end then return nil.
|
|
if len(c.stack) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
// Move down the stack to find the last element of the last leaf under this branch.
|
|
c.last()
|
|
k, v, flags := c.keyValue()
|
|
if (flags & uint32(bucketLeafFlag)) != 0 {
|
|
return k, nil
|
|
}
|
|
return k, v
|
|
}
|
|
|
|
// Seek moves the cursor to a given key and returns it.
|
|
// If the key does not exist then the next key is used. If no keys
|
|
// follow, a nil key is returned.
|
|
// The returned key and value are only valid for the life of the transaction.
|
|
func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
|
|
k, v, flags := c.seek(seek)
|
|
|
|
// If we ended up after the last element of a page then move to the next one.
|
|
if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
|
|
k, v, flags = c.next()
|
|
}
|
|
|
|
if k == nil {
|
|
return nil, nil
|
|
} else if (flags & uint32(bucketLeafFlag)) != 0 {
|
|
return k, nil
|
|
}
|
|
return k, v
|
|
}
|
|
|
|
// Delete removes the current key/value under the cursor from the bucket.
|
|
// Delete fails if current key/value is a bucket or if the transaction is not writable.
|
|
func (c *Cursor) Delete() error {
|
|
if c.bucket.tx.db == nil {
|
|
return ErrTxClosed
|
|
} else if !c.bucket.Writable() {
|
|
return ErrTxNotWritable
|
|
}
|
|
|
|
key, _, flags := c.keyValue()
|
|
// Return an error if current value is a bucket.
|
|
if (flags & bucketLeafFlag) != 0 {
|
|
return ErrIncompatibleValue
|
|
}
|
|
c.node().del(key)
|
|
|
|
return nil
|
|
}
|
|
|
|
// seek moves the cursor to a given key and returns it.
|
|
// If the key does not exist then the next key is used.
|
|
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
|
|
_assert(c.bucket.tx.db != nil, "tx closed")
|
|
|
|
// Start from root page/node and traverse to correct page.
|
|
c.stack = c.stack[:0]
|
|
c.search(seek, c.bucket.root)
|
|
|
|
// If this is a bucket then return a nil value.
|
|
return c.keyValue()
|
|
}
|
|
|
|
// first moves the cursor to the first leaf element under the last page in the stack.
|
|
func (c *Cursor) first() {
|
|
for {
|
|
// Exit when we hit a leaf page.
|
|
var ref = &c.stack[len(c.stack)-1]
|
|
if ref.isLeaf() {
|
|
break
|
|
}
|
|
|
|
// Keep adding pages pointing to the first element to the stack.
|
|
var pgid pgid
|
|
if ref.node != nil {
|
|
pgid = ref.node.inodes[ref.index].pgid
|
|
} else {
|
|
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
|
|
}
|
|
p, n := c.bucket.pageNode(pgid)
|
|
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
|
}
|
|
}
|
|
|
|
// last moves the cursor to the last leaf element under the last page in the stack.
|
|
func (c *Cursor) last() {
|
|
for {
|
|
// Exit when we hit a leaf page.
|
|
ref := &c.stack[len(c.stack)-1]
|
|
if ref.isLeaf() {
|
|
break
|
|
}
|
|
|
|
// Keep adding pages pointing to the last element in the stack.
|
|
var pgid pgid
|
|
if ref.node != nil {
|
|
pgid = ref.node.inodes[ref.index].pgid
|
|
} else {
|
|
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
|
|
}
|
|
p, n := c.bucket.pageNode(pgid)
|
|
|
|
var nextRef = elemRef{page: p, node: n}
|
|
nextRef.index = nextRef.count() - 1
|
|
c.stack = append(c.stack, nextRef)
|
|
}
|
|
}
|
|
|
|
// next moves to the next leaf element and returns the key and value.
|
|
// If the cursor is at the last leaf element then it stays there and returns nil.
|
|
func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
|
|
for {
|
|
// Attempt to move over one element until we're successful.
|
|
// Move up the stack as we hit the end of each page in our stack.
|
|
var i int
|
|
for i = len(c.stack) - 1; i >= 0; i-- {
|
|
elem := &c.stack[i]
|
|
if elem.index < elem.count()-1 {
|
|
elem.index++
|
|
break
|
|
}
|
|
}
|
|
|
|
// If we've hit the root page then stop and return. This will leave the
|
|
// cursor on the last element of the last page.
|
|
if i == -1 {
|
|
return nil, nil, 0
|
|
}
|
|
|
|
// Otherwise start from where we left off in the stack and find the
|
|
// first element of the first leaf page.
|
|
c.stack = c.stack[:i+1]
|
|
c.first()
|
|
|
|
// If this is an empty page then restart and move back up the stack.
|
|
// https://github.com/boltdb/bolt/issues/450
|
|
if c.stack[len(c.stack)-1].count() == 0 {
|
|
continue
|
|
}
|
|
|
|
return c.keyValue()
|
|
}
|
|
}
|
|
|
|
// search recursively performs a binary search against a given page/node until it finds a given key.
|
|
func (c *Cursor) search(key []byte, pgid pgid) {
|
|
p, n := c.bucket.pageNode(pgid)
|
|
if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
|
|
panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
|
|
}
|
|
e := elemRef{page: p, node: n}
|
|
c.stack = append(c.stack, e)
|
|
|
|
// If we're on a leaf page/node then find the specific node.
|
|
if e.isLeaf() {
|
|
c.nsearch(key)
|
|
return
|
|
}
|
|
|
|
if n != nil {
|
|
c.searchNode(key, n)
|
|
return
|
|
}
|
|
c.searchPage(key, p)
|
|
}
|
|
|
|
func (c *Cursor) searchNode(key []byte, n *node) {
|
|
var exact bool
|
|
index := sort.Search(len(n.inodes), func(i int) bool {
|
|
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
|
|
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
|
|
ret := bytes.Compare(n.inodes[i].key, key)
|
|
if ret == 0 {
|
|
exact = true
|
|
}
|
|
return ret != -1
|
|
})
|
|
if !exact && index > 0 {
|
|
index--
|
|
}
|
|
c.stack[len(c.stack)-1].index = index
|
|
|
|
// Recursively search to the next page.
|
|
c.search(key, n.inodes[index].pgid)
|
|
}
|
|
|
|
func (c *Cursor) searchPage(key []byte, p *page) {
|
|
// Binary search for the correct range.
|
|
inodes := p.branchPageElements()
|
|
|
|
var exact bool
|
|
index := sort.Search(int(p.count), func(i int) bool {
|
|
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
|
|
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
|
|
ret := bytes.Compare(inodes[i].key(), key)
|
|
if ret == 0 {
|
|
exact = true
|
|
}
|
|
return ret != -1
|
|
})
|
|
if !exact && index > 0 {
|
|
index--
|
|
}
|
|
c.stack[len(c.stack)-1].index = index
|
|
|
|
// Recursively search to the next page.
|
|
c.search(key, inodes[index].pgid)
|
|
}
|
|
|
|
// nsearch searches the leaf node on the top of the stack for a key.
|
|
func (c *Cursor) nsearch(key []byte) {
|
|
e := &c.stack[len(c.stack)-1]
|
|
p, n := e.page, e.node
|
|
|
|
// If we have a node then search its inodes.
|
|
if n != nil {
|
|
index := sort.Search(len(n.inodes), func(i int) bool {
|
|
return bytes.Compare(n.inodes[i].key, key) != -1
|
|
})
|
|
e.index = index
|
|
return
|
|
}
|
|
|
|
// If we have a page then search its leaf elements.
|
|
inodes := p.leafPageElements()
|
|
index := sort.Search(int(p.count), func(i int) bool {
|
|
return bytes.Compare(inodes[i].key(), key) != -1
|
|
})
|
|
e.index = index
|
|
}
|
|
|
|
// keyValue returns the key and value of the current leaf element.
|
|
func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
|
|
ref := &c.stack[len(c.stack)-1]
|
|
|
|
// If the cursor is pointing to the end of page/node then return nil.
|
|
if ref.count() == 0 || ref.index >= ref.count() {
|
|
return nil, nil, 0
|
|
}
|
|
|
|
// Retrieve value from node.
|
|
if ref.node != nil {
|
|
inode := &ref.node.inodes[ref.index]
|
|
return inode.key, inode.value, inode.flags
|
|
}
|
|
|
|
// Or retrieve value from page.
|
|
elem := ref.page.leafPageElement(uint16(ref.index))
|
|
return elem.key(), elem.value(), elem.flags
|
|
}
|
|
|
|
// node returns the node that the cursor is currently positioned on.
|
|
func (c *Cursor) node() *node {
|
|
_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
|
|
|
|
// If the top of the stack is a leaf node then just return it.
|
|
if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
|
|
return ref.node
|
|
}
|
|
|
|
// Start from root and traverse down the hierarchy.
|
|
var n = c.stack[0].node
|
|
if n == nil {
|
|
n = c.bucket.node(c.stack[0].page.id, nil)
|
|
}
|
|
for _, ref := range c.stack[:len(c.stack)-1] {
|
|
_assert(!n.isLeaf, "expected branch node")
|
|
n = n.childAt(int(ref.index))
|
|
}
|
|
_assert(n.isLeaf, "expected leaf node")
|
|
return n
|
|
}
|
|
|
|
// elemRef represents a reference to an element on a given page/node.
|
|
type elemRef struct {
|
|
page *page
|
|
node *node
|
|
index int
|
|
}
|
|
|
|
// isLeaf returns whether the ref is pointing at a leaf page/node.
|
|
func (r *elemRef) isLeaf() bool {
|
|
if r.node != nil {
|
|
return r.node.isLeaf
|
|
}
|
|
return (r.page.flags & leafPageFlag) != 0
|
|
}
|
|
|
|
// count returns the number of inodes or page elements.
|
|
func (r *elemRef) count() int {
|
|
if r.node != nil {
|
|
return len(r.node.inodes)
|
|
}
|
|
return int(r.page.count)
|
|
}
|