Autopilot: Server Stabilization, State and Dead Server Cleanup (#10856)

* k8s doc: update for 0.9.1 and 0.8.0 releases (#10825)

* k8s doc: update for 0.9.1 and 0.8.0 releases

* Update website/content/docs/platform/k8s/helm/configuration.mdx

Co-authored-by: Theron Voran <tvoran@users.noreply.github.com>

Co-authored-by: Theron Voran <tvoran@users.noreply.github.com>

* Autopilot initial commit

* Move autopilot related backend implementations to its own file

* Abstract promoter creation

* Add nil check for health

* Add server state oss no-ops

* Config ext stub for oss

* Make way for non-voters

* s/health/state

* s/ReadReplica/NonVoter

* Add synopsis and description

* Remove struct tags from AutopilotConfig

* Use var for config storage path

* Handle nin-config when reading

* Enable testing autopilot by using inmem cluster

* First passing test

* Only report the server as known if it is present in raft config

* Autopilot defaults to on for all existing and new clusters

* Add locking to some functions

* Persist initial config

* Clarify the command usage doc

* Add health metric for each node

* Fix audit logging issue

* Don't set DisablePerformanceStandby to true in test

* Use node id label for health metric

* Log updates to autopilot config

* Less aggressively consume config loading failures

* Return a mutable config

* Return early from known servers if raft config is unable to be pulled

* Update metrics name

* Reduce log level for potentially noisy log

* Add knob to disable autopilot

* Don't persist if default config is in use

* Autopilot: Dead server cleanup (#10857)

* Dead server cleanup

* Initialize channel in any case

* Fix a bunch of tests

* Fix panic

* Add follower locking in heartbeat tracker

* Add LastContactFailureThreshold to config

* Add log when marking node as dead

* Update follower state locking in heartbeat tracker

* Avoid follower states being nil

* Pull test to its own file

* Add execution status to state response

* Optionally enable autopilot in some tests

* Updates

* Added API function to fetch autopilot configuration

* Add test for default autopilot configuration

* Configuration tests

* Add State API test

* Update test

* Added TestClusterOptions.PhysicalFactoryConfig

* Update locking

* Adjust locking in heartbeat tracker

* s/last_contact_failure_threshold/left_server_last_contact_threshold

* Add disabling autopilot as a core config option

* Disable autopilot in some tests

* s/left_server_last_contact_threshold/dead_server_last_contact_threshold

* Set the lastheartbeat of followers to now when setting up active node

* Don't use config defaults from CLI command

* Remove config file support

* Remove HCL test as well

* Persist only supplied config; merge supplied config with default to operate

* Use pointer to structs for storing follower information

* Test update

* Retrieve non voter status from configbucket and set it up when a node comes up

* Manage desired suffrage

* Consider bucket being created already

* Move desired suffrage to its own entry

* s/DesiredSuffrageKey/LocalNodeConfigKey

* s/witnessSuffrage/recordSuffrage

* Fix test compilation

* Handle local node config post a snapshot install

* Commit to storage first; then record suffrage in fsm

* No need of local node config being nili case, post snapshot restore

* Reconcile autopilot config when a new leader takes over duty

* Grab fsm lock when recording suffrage

* s/Suffrage/DesiredSuffrage in FollowerState

* Instantiate autopilot only in leader

* Default to old ways in more scenarios

* Make API gracefully handle 404

* Address some feedback

* Make IsDead an atomic.Value

* Simplify follower hearbeat tracking

* Use uber.atomic

* Don't have multiple causes for having autopilot disabled

* Don't remove node from follower states if we fail to remove the dead server

* Autopilot server removals map (#11019)

* Don't remove node from follower states if we fail to remove the dead server

* Use map to track dead server removals

* Use lock and map

* Use delegate lock

* Adjust when to remove entry from map

* Only hold the lock while accessing map

* Fix race

* Don't set default min_quorum

* Fix test

* Ensure follower states is not nil before starting autopilot

* Fix race

Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com>
Co-authored-by: Theron Voran <tvoran@users.noreply.github.com>
This commit is contained in:
Vishal Nayak 2021-03-03 13:59:50 -05:00 committed by GitHub
parent 9741f51bee
commit 3e55e79a3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
47 changed files with 4762 additions and 281 deletions

View File

@ -2,9 +2,16 @@ package api
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"time"
"github.com/hashicorp/vault/sdk/helper/parseutil"
"github.com/mitchellh/mapstructure"
"github.com/hashicorp/vault/sdk/helper/consts"
)
@ -27,6 +34,77 @@ type RaftJoinRequest struct {
NonVoter bool `json:"non_voter"`
}
// AutopilotConfig is used for querying/setting the Autopilot configuration.
type AutopilotConfig struct {
CleanupDeadServers bool `json:"cleanup_dead_servers" mapstructure:"cleanup_dead_servers"`
LastContactThreshold time.Duration `json:"last_contact_threshold" mapstructure:"-"`
DeadServerLastContactThreshold time.Duration `json:"dead_server_last_contact_threshold" mapstructure:"-"`
MaxTrailingLogs uint64 `json:"max_trailing_logs" mapstructure:"max_trailing_logs"`
MinQuorum uint `json:"min_quorum" mapstructure:"min_quorum"`
ServerStabilizationTime time.Duration `json:"server_stabilization_time" mapstructure:"-"`
}
// UnmarshalJSON parses the autopilot config JSON blob
func (ac *AutopilotConfig) UnmarshalJSON(b []byte) error {
var data interface{}
err := json.Unmarshal(b, &data)
if err != nil {
return err
}
conf := data.(map[string]interface{})
if err = mapstructure.WeakDecode(conf, ac); err != nil {
return err
}
if ac.LastContactThreshold, err = parseutil.ParseDurationSecond(conf["last_contact_threshold"]); err != nil {
return err
}
if ac.DeadServerLastContactThreshold, err = parseutil.ParseDurationSecond(conf["dead_server_last_contact_threshold"]); err != nil {
return err
}
if ac.ServerStabilizationTime, err = parseutil.ParseDurationSecond(conf["server_stabilization_time"]); err != nil {
return err
}
return nil
}
// AutopilotExecutionStatus represents the current status of the autopilot background go routines
type AutopilotExecutionStatus string
const (
AutopilotNotRunning AutopilotExecutionStatus = "not-running"
AutopilotRunning AutopilotExecutionStatus = "running"
AutopilotShuttingDown AutopilotExecutionStatus = "shutting-down"
)
// AutopilotState represents the response of the raft autopilot state API
type AutopilotState struct {
ExecutionStatus AutopilotExecutionStatus `mapstructure:"execution_status"`
Healthy bool `mapstructure:"healthy"`
FailureTolerance int `mapstructure:"failure_tolerance"`
OptimisticFailureTolerance int `mapstructure:"optimistic_failure_tolerance"`
Servers map[string]*AutopilotServer `mapstructure:"servers"`
Leader string `mapstructure:"leader"`
Voters []string `mapstructure:"voters"`
NonVoters []string `mapstructure:"non_voters"`
}
// AutopilotServer represents the server blocks in the response of the raft
// autopilot state API.
type AutopilotServer struct {
ID string `mapstructure:"id"`
Name string `mapstructure:"name"`
Address string `mapstructure:"address"`
NodeStatus string `mapstructure:"node_status"`
LastContact string `mapstructure:"last_contact"`
LastTerm uint64 `mapstructure:"last_term"`
LastIndex uint64 `mapstructure:"last_index"`
Healthy bool `mapstructure:"healthy"`
StableSince string `mapstructure:"stable_since"`
Status string `mapstructure:"status"`
Meta map[string]string `mapstructure:"meta"`
}
// RaftJoin adds the node from which this call is invoked from to the raft
// cluster represented by the leader address in the parameter.
func (c *Sys) RaftJoin(opts *RaftJoinRequest) (*RaftJoinResponse, error) {
@ -160,3 +238,79 @@ func (c *Sys) RaftSnapshotRestore(snapReader io.Reader, force bool) error {
return nil
}
// RaftAutopilotState returns the state of the raft cluster as seen by autopilot.
func (c *Sys) RaftAutopilotState() (*AutopilotState, error) {
r := c.c.NewRequest("GET", "/v1/sys/storage/raft/autopilot/state")
ctx, cancelFunc := context.WithCancel(context.Background())
defer cancelFunc()
resp, err := c.c.RawRequestWithContext(ctx, r)
if resp != nil {
defer resp.Body.Close()
if resp.StatusCode == 404 {
return nil, nil
}
}
if err != nil {
return nil, err
}
secret, err := ParseSecret(resp.Body)
if err != nil {
return nil, err
}
if secret == nil || secret.Data == nil {
return nil, errors.New("data from server response is empty")
}
var result AutopilotState
err = mapstructure.Decode(secret.Data, &result)
if err != nil {
return nil, err
}
return &result, err
}
// RaftAutopilotConfiguration fetches the autopilot config.
func (c *Sys) RaftAutopilotConfiguration() (*AutopilotConfig, error) {
r := c.c.NewRequest("GET", "/v1/sys/storage/raft/autopilot/configuration")
ctx, cancelFunc := context.WithCancel(context.Background())
defer cancelFunc()
resp, err := c.c.RawRequestWithContext(ctx, r)
if resp != nil {
defer resp.Body.Close()
if resp.StatusCode == 404 {
return nil, nil
}
}
if err != nil {
return nil, err
}
secret, err := ParseSecret(resp.Body)
if err != nil {
return nil, err
}
if secret == nil {
return nil, errors.New("data from server response is empty")
}
var result AutopilotConfig
if err = mapstructure.Decode(secret.Data, &result); err != nil {
return nil, err
}
if result.LastContactThreshold, err = parseutil.ParseDurationSecond(secret.Data["last_contact_threshold"]); err != nil {
return nil, err
}
if result.DeadServerLastContactThreshold, err = parseutil.ParseDurationSecond(secret.Data["dead_server_last_contact_threshold"]); err != nil {
return nil, err
}
if result.ServerStabilizationTime, err = parseutil.ParseDurationSecond(secret.Data["server_stabilization_time"]); err != nil {
return nil, err
}
return &result, err
}

View File

@ -380,9 +380,9 @@ func (c *BaseCommand) flagSet(bit FlagSetBit) *FlagSets {
Target: &c.flagFormat,
Default: "table",
EnvVar: EnvVaultFormat,
Completion: complete.PredictSet("table", "json", "yaml"),
Usage: "Print the output in the given format. Valid formats " +
"are \"table\", \"json\", or \"yaml\".",
Completion: complete.PredictSet("table", "json", "yaml", "pretty"),
Usage: `Print the output in the given format. Valid formats
are "table", "json", "yaml", or "pretty".`,
})
}
}

View File

@ -359,6 +359,21 @@ func initCommands(ui, serverCmdUi cli.Ui, runOpts *RunOptions) {
BaseCommand: getBaseCommand(),
}, nil
},
"operator raft autopilot get-config": func() (cli.Command, error) {
return &OperatorRaftAutopilotGetConfigCommand{
BaseCommand: getBaseCommand(),
}, nil
},
"operator raft autopilot set-config": func() (cli.Command, error) {
return &OperatorRaftAutopilotSetConfigCommand{
BaseCommand: getBaseCommand(),
}, nil
},
"operator raft autopilot state": func() (cli.Command, error) {
return &OperatorRaftAutopilotStateCommand{
BaseCommand: getBaseCommand(),
}, nil
},
"operator raft list-peers": func() (cli.Command, error) {
return &OperatorRaftListPeersCommand{
BaseCommand: getBaseCommand(),

View File

@ -1,6 +1,7 @@
package command
import (
"bytes"
"encoding/json"
"errors"
"fmt"
@ -64,10 +65,11 @@ type Formatter interface {
}
var Formatters = map[string]Formatter{
"json": JsonFormatter{},
"table": TableFormatter{},
"yaml": YamlFormatter{},
"yml": YamlFormatter{},
"json": JsonFormatter{},
"table": TableFormatter{},
"yaml": YamlFormatter{},
"yml": YamlFormatter{},
"pretty": PrettyFormatter{},
}
func Format(ui cli.Ui) string {
@ -115,6 +117,98 @@ func (y YamlFormatter) Output(ui cli.Ui, secret *api.Secret, data interface{}) e
return err
}
type PrettyFormatter struct{}
func (p PrettyFormatter) Format(data interface{}) ([]byte, error) {
return nil, nil
}
func (p PrettyFormatter) Output(ui cli.Ui, secret *api.Secret, data interface{}) error {
switch data.(type) {
case *api.AutopilotState:
p.OutputAutopilotState(ui, data)
default:
return errors.New("cannot use the pretty formatter for this type")
}
return nil
}
func outputStringSlice(buffer *bytes.Buffer, indent string, values []string) {
for _, val := range values {
buffer.WriteString(fmt.Sprintf("%s%s\n", indent, val))
}
}
type mapOutput struct {
key string
value string
}
func formatServer(srv *api.AutopilotServer) string {
var buffer bytes.Buffer
buffer.WriteString(fmt.Sprintf(" %s\n", srv.ID))
buffer.WriteString(fmt.Sprintf(" Name: %s\n", srv.Name))
buffer.WriteString(fmt.Sprintf(" Address: %s\n", srv.Address))
buffer.WriteString(fmt.Sprintf(" Status: %s\n", srv.Status))
buffer.WriteString(fmt.Sprintf(" Node Status: %s\n", srv.NodeStatus))
buffer.WriteString(fmt.Sprintf(" Healthy: %t\n", srv.Healthy))
buffer.WriteString(fmt.Sprintf(" Last Contact: %s\n", srv.LastContact))
buffer.WriteString(fmt.Sprintf(" Last Term: %d\n", srv.LastTerm))
buffer.WriteString(fmt.Sprintf(" Last Index: %d\n", srv.LastIndex))
if len(srv.Meta) > 0 {
buffer.WriteString(fmt.Sprintf(" Meta\n"))
var outputs []mapOutput
for k, v := range srv.Meta {
outputs = append(outputs, mapOutput{key: k, value: fmt.Sprintf(" %q: %q\n", k, v)})
}
sort.Slice(outputs, func(i, j int) bool {
return outputs[i].key < outputs[j].key
})
for _, output := range outputs {
buffer.WriteString(output.value)
}
}
return buffer.String()
}
func (p PrettyFormatter) OutputAutopilotState(ui cli.Ui, data interface{}) {
state := data.(*api.AutopilotState)
var buffer bytes.Buffer
buffer.WriteString(fmt.Sprintf("Healthy: %t\n", state.Healthy))
buffer.WriteString(fmt.Sprintf("Failure Tolerance: %d\n", state.FailureTolerance))
buffer.WriteString(fmt.Sprintf("Optimistic Failure Tolerance: %d\n", state.OptimisticFailureTolerance))
buffer.WriteString(fmt.Sprintf("Leader: %s\n", state.Leader))
buffer.WriteString("Voters:\n")
outputStringSlice(&buffer, " ", state.Voters)
if len(state.NonVoters) > 0 {
buffer.WriteString("Non Voters:\n")
outputStringSlice(&buffer, " ", state.NonVoters)
}
buffer.WriteString("Servers:\n")
var outputs []mapOutput
for id, srv := range state.Servers {
outputs = append(outputs, mapOutput{key: id, value: formatServer(srv)})
}
sort.Slice(outputs, func(i, j int) bool {
return outputs[i].key < outputs[j].key
})
for _, output := range outputs {
buffer.WriteString(output.value)
}
ui.Output(buffer.String())
}
// An output formatter for table output of an object
type TableFormatter struct{}

View File

@ -0,0 +1,91 @@
package command
import (
"fmt"
"strings"
"github.com/mitchellh/cli"
"github.com/posener/complete"
)
var _ cli.Command = (*OperatorRaftAutopilotGetConfigCommand)(nil)
var _ cli.CommandAutocomplete = (*OperatorRaftAutopilotGetConfigCommand)(nil)
type OperatorRaftAutopilotGetConfigCommand struct {
*BaseCommand
}
func (c *OperatorRaftAutopilotGetConfigCommand) Synopsis() string {
return "Returns the configuration of the autopilot subsystem under integrated storage"
}
func (c *OperatorRaftAutopilotGetConfigCommand) Help() string {
helpText := `
Usage: vault operator raft autopilot get-config
Returns the configuration of the autopilot subsystem under integrated storage.
` + c.Flags().Help()
return strings.TrimSpace(helpText)
}
func (c *OperatorRaftAutopilotGetConfigCommand) Flags() *FlagSets {
set := c.flagSet(FlagSetHTTP | FlagSetOutputFormat)
return set
}
func (c *OperatorRaftAutopilotGetConfigCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictAnything
}
func (c *OperatorRaftAutopilotGetConfigCommand) AutocompleteFlags() complete.Flags {
return c.Flags().Completions()
}
func (c *OperatorRaftAutopilotGetConfigCommand) Run(args []string) int {
f := c.Flags()
if err := f.Parse(args); err != nil {
c.UI.Error(err.Error())
return 1
}
args = f.Args()
switch len(args) {
case 0:
default:
c.UI.Error(fmt.Sprintf("Incorrect arguments (expected 0, got %d)", len(args)))
return 1
}
client, err := c.Client()
if err != nil {
c.UI.Error(err.Error())
return 2
}
config, err := client.Sys().RaftAutopilotConfiguration()
if err != nil {
c.UI.Error(err.Error())
return 2
}
if config == nil {
return 0
}
if Format(c.UI) != "table" {
return OutputData(c.UI, config)
}
entries := []string{"Key | Value"}
entries = append(entries, fmt.Sprintf("%s | %t", "Cleanup Dead Servers", config.CleanupDeadServers))
entries = append(entries, fmt.Sprintf("%s | %s", "Last Contact Threshold", config.LastContactThreshold.String()))
entries = append(entries, fmt.Sprintf("%s | %s", "Dead Server Last Contact Threshold", config.DeadServerLastContactThreshold.String()))
entries = append(entries, fmt.Sprintf("%s | %s", "Server Stabilization Time", config.ServerStabilizationTime.String()))
entries = append(entries, fmt.Sprintf("%s | %d", "Min Quorum", config.MinQuorum))
entries = append(entries, fmt.Sprintf("%s | %d", "Max Trailing Logs", config.MaxTrailingLogs))
return OutputData(c.UI, entries)
}

View File

@ -0,0 +1,137 @@
package command
import (
"fmt"
"strings"
"time"
"github.com/mitchellh/cli"
"github.com/posener/complete"
)
var _ cli.Command = (*OperatorRaftAutopilotSetConfigCommand)(nil)
var _ cli.CommandAutocomplete = (*OperatorRaftAutopilotSetConfigCommand)(nil)
type OperatorRaftAutopilotSetConfigCommand struct {
*BaseCommand
flagCleanupDeadServers BoolPtr
flagLastContactThreshold time.Duration
flagDeadServerLastContactThreshold time.Duration
flagMaxTrailingLogs uint64
flagMinQuorum uint
flagServerStabilizationTime time.Duration
}
func (c *OperatorRaftAutopilotSetConfigCommand) Synopsis() string {
return "Modify the configuration of the autopilot subsystem under integrated storage"
}
func (c *OperatorRaftAutopilotSetConfigCommand) Help() string {
helpText := `
Usage: vault operator raft autopilot set-config [options]
Modify the configuration of the autopilot subsystem under integrated storage.
` + c.Flags().Help()
return strings.TrimSpace(helpText)
}
func (c *OperatorRaftAutopilotSetConfigCommand) Flags() *FlagSets {
set := c.flagSet(FlagSetHTTP | FlagSetOutputFormat)
f := set.NewFlagSet("Common Options")
f.BoolPtrVar(&BoolPtrVar{
Name: "cleanup-dead-servers",
Target: &c.flagCleanupDeadServers,
})
f.DurationVar(&DurationVar{
Name: "last-contact-threshold",
Target: &c.flagLastContactThreshold,
})
f.DurationVar(&DurationVar{
Name: "dead-server-last-contact-threshold",
Target: &c.flagDeadServerLastContactThreshold,
})
f.Uint64Var(&Uint64Var{
Name: "max-trailing-logs",
Target: &c.flagMaxTrailingLogs,
})
f.UintVar(&UintVar{
Name: "min-quorum",
Target: &c.flagMinQuorum,
})
f.DurationVar(&DurationVar{
Name: "server-stabilization-time",
Target: &c.flagServerStabilizationTime,
})
return set
}
func (c *OperatorRaftAutopilotSetConfigCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictAnything
}
func (c *OperatorRaftAutopilotSetConfigCommand) AutocompleteFlags() complete.Flags {
return c.Flags().Completions()
}
func (c *OperatorRaftAutopilotSetConfigCommand) Run(args []string) int {
f := c.Flags()
if err := f.Parse(args); err != nil {
c.UI.Error(err.Error())
return 1
}
args = f.Args()
switch len(args) {
case 0:
default:
c.UI.Error(fmt.Sprintf("Incorrect arguments (expected 0, got %d)", len(args)))
return 1
}
client, err := c.Client()
if err != nil {
c.UI.Error(err.Error())
return 2
}
data := make(map[string]interface{})
if c.flagCleanupDeadServers.IsSet() {
data["cleanup_dead_servers"] = c.flagCleanupDeadServers.Get()
}
if c.flagMaxTrailingLogs > 0 {
data["max_trailing_logs"] = c.flagMaxTrailingLogs
}
if c.flagMinQuorum > 0 {
data["min_quorum"] = c.flagMinQuorum
}
if c.flagLastContactThreshold > 0 {
data["last_contact_threshold"] = c.flagLastContactThreshold.String()
}
if c.flagDeadServerLastContactThreshold > 0 {
data["dead_server_last_contact_threshold"] = c.flagDeadServerLastContactThreshold.String()
}
if c.flagServerStabilizationTime > 0 {
data["server_stabilization_time"] = c.flagServerStabilizationTime.String()
}
secret, err := client.Logical().Write("sys/storage/raft/autopilot/configuration", data)
if err != nil {
c.UI.Error(err.Error())
return 2
}
if secret == nil {
return 0
}
return OutputSecret(c.UI, secret)
}

View File

@ -0,0 +1,92 @@
package command
import (
"flag"
"fmt"
"strings"
"github.com/mitchellh/cli"
"github.com/posener/complete"
)
var _ cli.Command = (*OperatorRaftAutopilotStateCommand)(nil)
var _ cli.CommandAutocomplete = (*OperatorRaftAutopilotStateCommand)(nil)
type OperatorRaftAutopilotStateCommand struct {
*BaseCommand
}
func (c *OperatorRaftAutopilotStateCommand) Synopsis() string {
return "Displays the state of the raft cluster under integrated storage as seen by autopilot"
}
func (c *OperatorRaftAutopilotStateCommand) Help() string {
helpText := `
Usage: vault operator raft autopilot state
Displays the state of the raft cluster under integrated storage as seen by autopilot.
` + c.Flags().Help()
return strings.TrimSpace(helpText)
}
func (c *OperatorRaftAutopilotStateCommand) Flags() *FlagSets {
set := c.flagSet(FlagSetHTTP | FlagSetOutputFormat)
// The output of the state endpoint contains nested values and is not fit for
// the default "table" display format. Override the default display format to
// "pretty", both in the flag and in the UI.
set.mainSet.VisitAll(func(fl *flag.Flag) {
if fl.Name == "format" {
fl.DefValue = "pretty"
}
})
ui, ok := c.UI.(*VaultUI)
if ok && ui.format == "table" {
ui.format = "pretty"
}
return set
}
func (c *OperatorRaftAutopilotStateCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictAnything
}
func (c *OperatorRaftAutopilotStateCommand) AutocompleteFlags() complete.Flags {
return c.Flags().Completions()
}
func (c *OperatorRaftAutopilotStateCommand) Run(args []string) int {
f := c.Flags()
if err := f.Parse(args); err != nil {
c.UI.Error(err.Error())
return 1
}
args = f.Args()
switch len(args) {
case 0:
default:
c.UI.Error(fmt.Sprintf("Incorrect arguments (expected 0, got %d)", len(args)))
return 1
}
client, err := c.Client()
if err != nil {
c.UI.Error(err.Error())
return 2
}
state, err := client.Sys().RaftAutopilotState()
if err != nil {
c.UI.Error(fmt.Sprintf("Error checking autopilot state: %s", err))
return 2
}
if state == nil {
return 0
}
return OutputData(c.UI, state)
}

View File

@ -895,3 +895,36 @@ func testLoadConfigFileLeaseMetrics(t *testing.T) {
t.Fatal(diff)
}
}
func testConfigRaftAutopilot(t *testing.T) {
config, err := LoadConfigFile("./test-fixtures/raft_autopilot.hcl")
if err != nil {
t.Fatal(err)
}
autopilotConfig := `[{"cleanup_dead_servers":true,"last_contact_threshold":"500ms","max_trailing_logs":250,"min_quorum":3,"server_stabilization_time":"10s"}]`
expected := &Config{
SharedConfig: &configutil.SharedConfig{
Listeners: []*configutil.Listener{
{
Type: "tcp",
Address: "127.0.0.1:8200",
},
},
DisableMlock: true,
},
Storage: &Storage{
Type: "raft",
Config: map[string]string{
"path": "/storage/path/raft",
"node_id": "raft1",
"autopilot": autopilotConfig,
},
},
}
config.Listeners[0].RawConfig = nil
if diff := deep.Equal(config, expected); diff != nil {
t.Fatal(diff)
}
}

4
go.mod
View File

@ -74,7 +74,8 @@ require (
github.com/hashicorp/golang-lru v0.5.4
github.com/hashicorp/hcl v1.0.1-vault
github.com/hashicorp/nomad/api v0.0.0-20191220223628-edc62acd919d
github.com/hashicorp/raft v1.1.3-0.20201002073007-f367681f9c48
github.com/hashicorp/raft v1.2.0
github.com/hashicorp/raft-autopilot v0.1.2
github.com/hashicorp/raft-snapshot v1.0.3
github.com/hashicorp/serf v0.9.5 // indirect
github.com/hashicorp/vault-plugin-auth-alicloud v0.7.0
@ -155,6 +156,7 @@ require (
go.etcd.io/etcd v0.5.0-alpha.5.0.20200425165423-262c93980547
go.mongodb.org/mongo-driver v1.4.6
go.uber.org/atomic v1.6.0
go.uber.org/zap v1.14.1 // indirect
golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d

6
go.sum
View File

@ -652,6 +652,10 @@ github.com/hashicorp/raft v1.0.1/go.mod h1:DVSAWItjLjTOkVbSpWQ0j0kUADIvDaCtBxIcb
github.com/hashicorp/raft v1.1.2-0.20191002163536-9c6bd3e3eb17/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
github.com/hashicorp/raft v1.1.3-0.20201002073007-f367681f9c48 h1:TpaG+HAdfQyreWNaxIlMU6myVKo2ciBDFdRyc+Z90OI=
github.com/hashicorp/raft v1.1.3-0.20201002073007-f367681f9c48/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
github.com/hashicorp/raft v1.2.0 h1:mHzHIrF0S91d3A7RPBvuqkgB4d/7oFJZyvf1Q4m7GA0=
github.com/hashicorp/raft v1.2.0/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
github.com/hashicorp/raft-autopilot v0.1.2 h1:yeqdUjWLjVJkBM+mcVxqwxi+w+aHsb9cEON2dz69OCs=
github.com/hashicorp/raft-autopilot v0.1.2/go.mod h1:Af4jZBwaNOI+tXfIqIdbcAnh/UyyqIMj/pOISIfhArw=
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk=
github.com/hashicorp/raft-snapshot v1.0.3 h1:lTgBBGMFcuKBTwHqWZ4r0TLzNsqo/OByCga/kM6F0uM=
github.com/hashicorp/raft-snapshot v1.0.3/go.mod h1:5sL9eUn72lH5DzsFIJ9jaysITbHksSSszImWSOTC8Ic=
@ -1214,6 +1218,7 @@ go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk=
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4=
go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A=
@ -1463,6 +1468,7 @@ golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtn
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=

View File

@ -79,11 +79,7 @@ func MakeFileBackend(t testing.T, logger hclog.Logger) *vault.PhysicalBackendBun
}
}
func MakeRaftBackend(t testing.T, coreIdx int, logger hclog.Logger) *vault.PhysicalBackendBundle {
return MakeRaftBackendWithConf(t, coreIdx, logger, nil)
}
func MakeRaftBackendWithConf(t testing.T, coreIdx int, logger hclog.Logger, extraConf map[string]string) *vault.PhysicalBackendBundle {
func MakeRaftBackend(t testing.T, coreIdx int, logger hclog.Logger, extraConf map[string]interface{}) *vault.PhysicalBackendBundle {
nodeID := fmt.Sprintf("core-%d", coreIdx)
raftDir, err := ioutil.TempDir("", "vault-raft-")
if err != nil {
@ -102,7 +98,10 @@ func MakeRaftBackendWithConf(t testing.T, coreIdx int, logger hclog.Logger, extr
"performance_multiplier": "8",
}
for k, v := range extraConf {
conf[k] = v
val, ok := v.(string)
if ok {
conf[k] = val
}
}
backend, err := raft.NewRaftBackend(conf, logger.Named("raft"))
@ -120,11 +119,11 @@ func MakeRaftBackendWithConf(t testing.T, coreIdx int, logger hclog.Logger, extr
// RaftHAFactory returns a PhysicalBackendBundle with raft set as the HABackend
// and the physical.Backend provided in PhysicalBackendBundler as the storage
// backend.
func RaftHAFactory(f PhysicalBackendBundler) func(t testing.T, coreIdx int, logger hclog.Logger) *vault.PhysicalBackendBundle {
return func(t testing.T, coreIdx int, logger hclog.Logger) *vault.PhysicalBackendBundle {
func RaftHAFactory(f PhysicalBackendBundler) func(t testing.T, coreIdx int, logger hclog.Logger, conf map[string]interface{}) *vault.PhysicalBackendBundle {
return func(t testing.T, coreIdx int, logger hclog.Logger, conf map[string]interface{}) *vault.PhysicalBackendBundle {
// Call the factory func to create the storage backend
physFactory := SharedPhysicalFactory(f)
bundle := physFactory(t, coreIdx, logger)
bundle := physFactory(t, coreIdx, logger, nil)
// This can happen if a shared physical backend is called on a non-0th core.
if bundle == nil {
@ -137,14 +136,14 @@ func RaftHAFactory(f PhysicalBackendBundler) func(t testing.T, coreIdx int, logg
}
nodeID := fmt.Sprintf("core-%d", coreIdx)
conf := map[string]string{
backendConf := map[string]string{
"path": raftDir,
"node_id": nodeID,
"performance_multiplier": "8",
}
// Create and set the HA Backend
raftBackend, err := raft.NewRaftBackend(conf, logger)
raftBackend, err := raft.NewRaftBackend(backendConf, logger)
if err != nil {
bundle.Cleanup()
t.Fatal(err)
@ -166,8 +165,8 @@ func RaftHAFactory(f PhysicalBackendBundler) func(t testing.T, coreIdx int, logg
type PhysicalBackendBundler func(t testing.T, logger hclog.Logger) *vault.PhysicalBackendBundle
func SharedPhysicalFactory(f PhysicalBackendBundler) func(t testing.T, coreIdx int, logger hclog.Logger) *vault.PhysicalBackendBundle {
return func(t testing.T, coreIdx int, logger hclog.Logger) *vault.PhysicalBackendBundle {
func SharedPhysicalFactory(f PhysicalBackendBundler) func(t testing.T, coreIdx int, logger hclog.Logger, conf map[string]interface{}) *vault.PhysicalBackendBundle {
return func(t testing.T, coreIdx int, logger hclog.Logger, conf map[string]interface{}) *vault.PhysicalBackendBundle {
if coreIdx == 0 {
return f(t, logger)
}

View File

@ -44,7 +44,7 @@ func MakeReusableStorage(t testing.T, logger hclog.Logger, bundle *vault.Physica
IsRaft: false,
Setup: func(conf *vault.CoreConfig, opts *vault.TestClusterOptions) {
opts.PhysicalFactory = func(t testing.T, coreIdx int, logger hclog.Logger) *vault.PhysicalBackendBundle {
opts.PhysicalFactory = func(t testing.T, coreIdx int, logger hclog.Logger, conf map[string]interface{}) *vault.PhysicalBackendBundle {
if coreIdx == 0 {
// We intentionally do not clone the backend's Cleanup func,
// because we don't want it to be run until the entire test has
@ -86,7 +86,7 @@ func MakeReusableRaftStorage(t testing.T, logger hclog.Logger, numCores int, add
Setup: func(conf *vault.CoreConfig, opts *vault.TestClusterOptions) {
conf.DisablePerformanceStandby = true
opts.KeepStandbysSealed = true
opts.PhysicalFactory = func(t testing.T, coreIdx int, logger hclog.Logger) *vault.PhysicalBackendBundle {
opts.PhysicalFactory = func(t testing.T, coreIdx int, logger hclog.Logger, conf map[string]interface{}) *vault.PhysicalBackendBundle {
return makeReusableRaftBackend(t, coreIdx, logger, raftDirs[coreIdx], addressProvider, false)
}
},
@ -125,7 +125,7 @@ func MakeReusableRaftHAStorage(t testing.T, logger hclog.Logger, numCores int, b
storage := ReusableStorage{
Setup: func(conf *vault.CoreConfig, opts *vault.TestClusterOptions) {
opts.KeepStandbysSealed = true
opts.PhysicalFactory = func(t testing.T, coreIdx int, logger hclog.Logger) *vault.PhysicalBackendBundle {
opts.PhysicalFactory = func(t testing.T, coreIdx int, logger hclog.Logger, conf map[string]interface{}) *vault.PhysicalBackendBundle {
haBundle := makeReusableRaftBackend(t, coreIdx, logger, raftDirs[coreIdx], nil, true)
return &vault.PhysicalBackendBundle{

View File

@ -38,10 +38,11 @@ const (
var (
// dataBucketName is the value we use for the bucket
dataBucketName = []byte("data")
configBucketName = []byte("config")
latestIndexKey = []byte("latest_indexes")
latestConfigKey = []byte("latest_config")
dataBucketName = []byte("data")
configBucketName = []byte("config")
latestIndexKey = []byte("latest_indexes")
latestConfigKey = []byte("latest_config")
localNodeConfigKey = []byte("local_node_config")
)
// Verify FSM satisfies the correct interfaces
@ -86,10 +87,13 @@ type FSM struct {
restoreCb restoreCallback
chunker *raftchunking.ChunkingBatchingFSM
localID string
desiredSuffrage string
}
// NewFSM constructs a FSM using the given directory
func NewFSM(path string, logger log.Logger) (*FSM, error) {
func NewFSM(path string, localID string, logger log.Logger) (*FSM, error) {
// Initialize the latest term, index, and config values
latestTerm := new(uint64)
@ -106,6 +110,11 @@ func NewFSM(path string, logger log.Logger) (*FSM, error) {
latestTerm: latestTerm,
latestIndex: latestIndex,
latestConfig: latestConfig,
// Assume that the default intent is to join as as voter. This will be updated
// when this node joins a cluster with a different suffrage, or during cluster
// setup if this is already part of a cluster with a desired suffrage.
desiredSuffrage: "voter",
localID: localID,
}
f.chunker = raftchunking.NewChunkingBatchingFSM(f, &FSMChunkStorage{
@ -243,6 +252,113 @@ func writeSnapshotMetaToDB(metadata *raft.SnapshotMeta, db *bolt.DB) error {
return nil
}
func (f *FSM) localNodeConfig() (*LocalNodeConfigValue, error) {
var configBytes []byte
if err := f.db.View(func(tx *bolt.Tx) error {
value := tx.Bucket(configBucketName).Get(localNodeConfigKey)
if value != nil {
configBytes = make([]byte, len(value))
copy(configBytes, value)
}
return nil
}); err != nil {
return nil, err
}
if configBytes == nil {
return nil, nil
}
var lnConfig LocalNodeConfigValue
if configBytes != nil {
err := proto.Unmarshal(configBytes, &lnConfig)
if err != nil {
return nil, err
}
f.desiredSuffrage = lnConfig.DesiredSuffrage
return &lnConfig, nil
}
return nil, nil
}
func (f *FSM) upgradeLocalNodeConfig() error {
// Read the local node config
lnConfig, err := f.localNodeConfig()
if err != nil {
return err
}
// Entry is already present. Get the suffrage value.
if lnConfig != nil {
f.desiredSuffrage = lnConfig.DesiredSuffrage
return nil
}
//
// This is the upgrade case where there is no entry
//
lnConfig = &LocalNodeConfigValue{}
// Refer to the persisted latest raft config
config := f.latestConfig.Load().(*ConfigurationValue)
// If there is no config, then this is a fresh node coming up. This could end up
// being a voter or non-voter. But by default assume that this is a voter. It
// will be changed if this node joins the cluster as a non-voter.
if config == nil {
lnConfig.DesiredSuffrage = f.desiredSuffrage
return f.persistDesiredSuffrage(lnConfig)
}
// Get the last known suffrage of the node and assume that it is the desired
// suffrage. There is no better alternative here.
for _, srv := range config.Servers {
if srv.Id == f.localID {
switch srv.Suffrage {
case int32(raft.Nonvoter):
lnConfig.DesiredSuffrage = "non-voter"
default:
lnConfig.DesiredSuffrage = "voter"
}
// Bring the intent to the fsm instance.
f.desiredSuffrage = lnConfig.DesiredSuffrage
break
}
}
return f.persistDesiredSuffrage(lnConfig)
}
// recordSuffrage is called when a node successfully joins the cluster. This
// intent should land in the stored configuration. If the config isn't available
// yet, we still go ahead and store the intent in the fsm. During the next
// update to the configuration, this intent will be persisted.
func (f *FSM) recordSuffrage(desiredSuffrage string) error {
f.l.Lock()
defer f.l.Unlock()
if err := f.persistDesiredSuffrage(&LocalNodeConfigValue{
DesiredSuffrage: desiredSuffrage,
}); err != nil {
return err
}
f.desiredSuffrage = desiredSuffrage
return nil
}
func (f *FSM) persistDesiredSuffrage(lnconfig *LocalNodeConfigValue) error {
dsBytes, err := proto.Marshal(lnconfig)
if err != nil {
return err
}
return f.db.Update(func(tx *bolt.Tx) error {
return tx.Bucket(configBucketName).Put(localNodeConfigKey, dsBytes)
})
}
func (f *FSM) witnessSnapshot(metadata *raft.SnapshotMeta) error {
f.l.RLock()
defer f.l.RUnlock()
@ -645,6 +761,12 @@ func (f *FSM) Restore(r io.ReadCloser) error {
f.l.Lock()
defer f.l.Unlock()
// Cache the local node config before closing the db file
lnConfig, err := f.localNodeConfig()
if err != nil {
return err
}
// Close the db file
if err := f.db.Close(); err != nil {
f.logger.Error("failed to close database file", "error", err)
@ -671,6 +793,16 @@ func (f *FSM) Restore(r io.ReadCloser) error {
retErr = multierror.Append(retErr, errwrap.Wrapf("failed to open new bolt file: {{err}}", err))
}
// Handle local node config restore. lnConfig should not be nil here, but
// adding the nil check anyways for safety.
if lnConfig != nil {
// Persist the local node config on the restored fsm.
if err := f.persistDesiredSuffrage(lnConfig); err != nil {
f.logger.Error("failed to persist local node config from before the restore", "error", err)
retErr = multierror.Append(retErr, errwrap.Wrapf("failed to persist local node config from before the restore: {{err}}", err))
}
}
return retErr.ErrorOrNil()
}

View File

@ -28,7 +28,7 @@ func getFSM(t testing.TB) (*FSM, string) {
Level: hclog.Trace,
})
fsm, err := NewFSM(raftDir, logger)
fsm, err := NewFSM(raftDir, "", logger)
if err != nil {
t.Fatal(err)
}

View File

@ -21,6 +21,7 @@ import (
"github.com/hashicorp/go-raftchunking"
"github.com/hashicorp/go-uuid"
"github.com/hashicorp/raft"
autopilot "github.com/hashicorp/raft-autopilot"
snapshot "github.com/hashicorp/raft-snapshot"
raftboltdb "github.com/hashicorp/vault/physical/raft/logstore"
"github.com/hashicorp/vault/sdk/helper/consts"
@ -120,6 +121,30 @@ type RaftBackend struct {
// It is suggested to use a value of 2x the Raft chunking size for optimal
// performance.
maxEntrySize uint64
// autopilot is the instance of raft-autopilot library implementation of the
// autopilot features. This will be instantiated in both leader and followers.
// However, only active node will have a "running" autopilot.
autopilot *autopilot.Autopilot
// autopilotConfig represents the configuration required to instantiate autopilot.
autopilotConfig *AutopilotConfig
// followerStates represents the information about all the peers of the raft
// leader. This is used to track some state of the peers and as well as used
// to see if the peers are "alive" using the heartbeat received from them.
followerStates *FollowerStates
// followerHeartbeatTicker is used to compute dead servers using follower
// state heartbeats.
followerHeartbeatTicker *time.Ticker
// disableAutopilot if set will not put autopilot implementation to use. The
// fallback will be to interact with the raft instance directly. This can only
// be set during startup via the environment variable
// VAULT_RAFT_AUTOPILOT_DISABLE during startup and can't be updated once the
// node is up and running.
disableAutopilot bool
}
// LeaderJoinInfo contains information required by a node to join itself as a
@ -247,7 +272,6 @@ func EnsurePath(path string, dir bool) error {
// NewRaftBackend constructs a RaftBackend using the given directory
func NewRaftBackend(conf map[string]string, logger log.Logger) (physical.Backend, error) {
path := os.Getenv(EnvVaultRaftPath)
if path == "" {
pathFromConfig, ok := conf["path"]
@ -257,8 +281,50 @@ func NewRaftBackend(conf map[string]string, logger log.Logger) (physical.Backend
path = pathFromConfig
}
var localID string
{
// Determine the local node ID from the environment.
if raftNodeID := os.Getenv(EnvVaultRaftNodeID); raftNodeID != "" {
localID = raftNodeID
}
// If not set in the environment check the configuration file.
if len(localID) == 0 {
localID = conf["node_id"]
}
// If not set in the config check the "node-id" file.
if len(localID) == 0 {
localIDRaw, err := ioutil.ReadFile(filepath.Join(path, "node-id"))
switch {
case err == nil:
if len(localIDRaw) > 0 {
localID = string(localIDRaw)
}
case os.IsNotExist(err):
default:
return nil, err
}
}
// If all of the above fails generate a UUID and persist it to the
// "node-id" file.
if len(localID) == 0 {
id, err := uuid.GenerateUUID()
if err != nil {
return nil, err
}
if err := ioutil.WriteFile(filepath.Join(path, "node-id"), []byte(id), 0600); err != nil {
return nil, err
}
localID = id
}
}
// Create the FSM.
fsm, err := NewFSM(path, logger.Named("fsm"))
fsm, err := NewFSM(path, localID, logger.Named("fsm"))
if err != nil {
return nil, fmt.Errorf("failed to create fsm: %v", err)
}
@ -322,48 +388,6 @@ func NewRaftBackend(conf map[string]string, logger log.Logger) (physical.Backend
snap = newSnapshotStoreDelay(snap, delay)
}
var localID string
{
// Determine the local node ID from the environment.
if raftNodeID := os.Getenv(EnvVaultRaftNodeID); raftNodeID != "" {
localID = raftNodeID
}
// If not set in the environment check the configuration file.
if len(localID) == 0 {
localID = conf["node_id"]
}
// If not set in the config check the "node-id" file.
if len(localID) == 0 {
localIDRaw, err := ioutil.ReadFile(filepath.Join(path, "node-id"))
switch {
case err == nil:
if len(localIDRaw) > 0 {
localID = string(localIDRaw)
}
case os.IsNotExist(err):
default:
return nil, err
}
}
// If all of the above fails generate a UUID and persist it to the
// "node-id" file.
if len(localID) == 0 {
id, err := uuid.GenerateUUID()
if err != nil {
return nil, err
}
if err := ioutil.WriteFile(filepath.Join(path, "node-id"), []byte(id), 0600); err != nil {
return nil, err
}
localID = id
}
}
maxEntrySize := defaultMaxEntrySize
if maxEntrySizeCfg := conf["max_entry_size"]; len(maxEntrySizeCfg) != 0 {
i, err := strconv.Atoi(maxEntrySizeCfg)
@ -375,17 +399,18 @@ func NewRaftBackend(conf map[string]string, logger log.Logger) (physical.Backend
}
return &RaftBackend{
logger: logger,
fsm: fsm,
raftInitCh: make(chan struct{}),
conf: conf,
logStore: log,
stableStore: stable,
snapStore: snap,
dataDir: path,
localID: localID,
permitPool: physical.NewPermitPool(physical.DefaultParallelOperations),
maxEntrySize: maxEntrySize,
logger: logger,
fsm: fsm,
raftInitCh: make(chan struct{}),
conf: conf,
logStore: log,
stableStore: stable,
snapStore: snap,
dataDir: path,
localID: localID,
permitPool: physical.NewPermitPool(physical.DefaultParallelOperations),
maxEntrySize: maxEntrySize,
followerHeartbeatTicker: time.NewTicker(time.Second),
}, nil
}
@ -781,6 +806,11 @@ func (b *RaftBackend) SetupCluster(ctx context.Context, opts SetupOpts) error {
b.raft = raftObj
b.raftNotifyCh = raftNotifyCh
if err := b.fsm.upgradeLocalNodeConfig(); err != nil {
b.logger.Error("failed to upgrade local node configuration")
return err
}
if b.streamLayer != nil {
// Add Handler to the cluster.
opts.ClusterListener.AddHandler(consts.RaftStorageALPN, b.streamLayer)
@ -852,19 +882,42 @@ func (b *RaftBackend) AppliedIndex() uint64 {
return indexState.Index
}
// Term returns the raft term of this node.
func (b *RaftBackend) Term() uint64 {
b.l.RLock()
defer b.l.RUnlock()
if b.fsm == nil {
return 0
}
// We use the latest index that the FSM has seen here, which may be behind
// raft.AppliedIndex() due to the async nature of the raft library.
indexState, _ := b.fsm.LatestState()
return indexState.Term
}
// RemovePeer removes the given peer ID from the raft cluster. If the node is
// ourselves we will give up leadership.
func (b *RaftBackend) RemovePeer(ctx context.Context, peerID string) error {
b.l.RLock()
defer b.l.RUnlock()
if b.raft == nil {
return errors.New("raft storage is not initialized")
if b.disableAutopilot {
if b.raft == nil {
return errors.New("raft storage is not initialized")
}
b.logger.Trace("removing server from raft", "id", peerID)
future := b.raft.RemoveServer(raft.ServerID(peerID), 0, 0)
return future.Error()
}
future := b.raft.RemoveServer(raft.ServerID(peerID), 0, 0)
if b.autopilot == nil {
return errors.New("raft storage autopilot is not initialized")
}
return future.Error()
b.logger.Trace("removing server from raft via autopilot", "id", peerID)
return b.autopilot.RemoveServer(raft.ServerID(peerID))
}
func (b *RaftBackend) GetConfiguration(ctx context.Context) (*RaftConfigurationResponse, error) {
@ -905,14 +958,27 @@ func (b *RaftBackend) AddPeer(ctx context.Context, peerID, clusterAddr string) e
b.l.RLock()
defer b.l.RUnlock()
if b.raft == nil {
return errors.New("raft storage is not initialized")
if b.disableAutopilot {
if b.raft == nil {
return errors.New("raft storage is not initialized")
}
b.logger.Trace("adding server to raft", "id", peerID)
future := b.raft.AddVoter(raft.ServerID(peerID), raft.ServerAddress(clusterAddr), 0, 0)
return future.Error()
}
b.logger.Debug("adding raft peer", "node_id", peerID, "cluster_addr", clusterAddr)
if b.autopilot == nil {
return errors.New("raft storage autopilot is not initialized")
}
future := b.raft.AddVoter(raft.ServerID(peerID), raft.ServerAddress(clusterAddr), 0, 0)
return future.Error()
b.logger.Trace("adding server to raft via autopilot", "id", peerID)
return b.autopilot.AddServer(&autopilot.Server{
ID: raft.ServerID(peerID),
Name: peerID,
Address: raft.ServerAddress(clusterAddr),
RaftVersion: raft.ProtocolVersionMax,
NodeType: autopilot.NodeVoter,
})
}
// Peers returns all the servers present in the raft cluster
@ -921,7 +987,7 @@ func (b *RaftBackend) Peers(ctx context.Context) ([]Peer, error) {
defer b.l.RUnlock()
if b.raft == nil {
return nil, errors.New("raft storage backend is not initialized")
return nil, errors.New("raft storage is not initialized")
}
future := b.raft.GetConfiguration()
@ -957,7 +1023,7 @@ func (b *RaftBackend) Snapshot(out io.Writer, access *seal.Access) error {
defer b.l.RUnlock()
if b.raft == nil {
return errors.New("raft storage backend is sealed")
return errors.New("raft storage is sealed")
}
// If we have access to the seal create a sealer object
@ -982,7 +1048,7 @@ func (b *RaftBackend) WriteSnapshotToTemp(in io.ReadCloser, access *seal.Access)
var metadata raft.SnapshotMeta
if b.raft == nil {
return nil, nil, metadata, errors.New("raft storage backend is sealed")
return nil, nil, metadata, errors.New("raft storage is sealed")
}
// If we have access to the seal create a sealer object
@ -1150,7 +1216,7 @@ func (b *RaftBackend) Transaction(ctx context.Context, txns []*physical.TxnEntry
// persisted to the local FSM. Caller should hold the backend's read lock.
func (b *RaftBackend) applyLog(ctx context.Context, command *LogData) error {
if b.raft == nil {
return errors.New("raft storage backend is not initialized")
return errors.New("raft storage is not initialized")
}
commandBytes, err := proto.Marshal(command)
@ -1222,6 +1288,35 @@ func (b *RaftBackend) LockWith(key, value string) (physical.Lock, error) {
}, nil
}
// SetDesiredSuffrage sets a field in the fsm indicating the suffrage intent for
// this node.
func (b *RaftBackend) SetDesiredSuffrage(nonVoter bool) error {
b.l.Lock()
defer b.l.Unlock()
var desiredSuffrage string
switch nonVoter {
case true:
desiredSuffrage = "non-voter"
default:
desiredSuffrage = "voter"
}
err := b.fsm.recordSuffrage(desiredSuffrage)
if err != nil {
return err
}
return nil
}
func (b *RaftBackend) DesiredSuffrage() string {
b.l.RLock()
desiredSuffrage := b.fsm.desiredSuffrage
b.l.RUnlock()
return desiredSuffrage
}
// RaftLock implements the physical Lock interface and enables HA for this
// backend. The Lock uses the raftNotifyCh for receiving leadership edge
// triggers. Vault's active duty matches raft's leadership.
@ -1327,8 +1422,6 @@ func (l *RaftLock) Lock(stopCh <-chan struct{}) (<-chan struct{}, error) {
return nil, nil
}
}
return nil, nil
}
// Unlock gives up leadership.

View File

@ -0,0 +1,711 @@
package raft
import (
"context"
"encoding/json"
"errors"
"fmt"
"math"
"os"
"strconv"
"sync"
"time"
"github.com/hashicorp/vault/sdk/helper/parseutil"
"github.com/hashicorp/vault/sdk/helper/strutil"
"go.uber.org/atomic"
metrics "github.com/armon/go-metrics"
"github.com/hashicorp/raft"
autopilot "github.com/hashicorp/raft-autopilot"
"github.com/mitchellh/mapstructure"
)
type CleanupDeadServersValue int
const (
CleanupDeadServersUnset CleanupDeadServersValue = 0
CleanupDeadServersTrue CleanupDeadServersValue = 1
CleanupDeadServersFalse CleanupDeadServersValue = 2
)
func (c CleanupDeadServersValue) Value() bool {
switch c {
case CleanupDeadServersTrue:
return true
default:
return false
}
}
// AutopilotConfig is used for querying/setting the Autopilot configuration.
type AutopilotConfig struct {
// CleanupDeadServers controls whether to remove dead servers from the Raft
// peer list periodically or when a new server joins.
CleanupDeadServers bool `mapstructure:"cleanup_dead_servers"`
// CleanupDeadServersValue is used to shadow the CleanupDeadServers field in
// storage. Having it as an int helps in knowing if the value was set explicitly
// using the API or not.
CleanupDeadServersValue CleanupDeadServersValue `mapstructure:"cleanup_dead_servers_value"`
// LastContactThreshold is the limit on the amount of time a server can go
// without leader contact before being considered unhealthy.
LastContactThreshold time.Duration `mapstructure:"-"`
// DeadServerLastContactThreshold is the limit on the amount of time a server
// can go without leader contact before being considered failed. This takes
// effect only when CleanupDeadServers is set.
DeadServerLastContactThreshold time.Duration `mapstructure:"-"`
// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
// be behind before being considered unhealthy.
MaxTrailingLogs uint64 `mapstructure:"max_trailing_logs"`
// MinQuorum sets the minimum number of servers allowed in a cluster before
// autopilot can prune dead servers.
MinQuorum uint `mapstructure:"min_quorum"`
// ServerStabilizationTime is the minimum amount of time a server must be in a
// stable, healthy state before it can be added to the cluster. Only applicable
// with Raft protocol version 3 or higher.
ServerStabilizationTime time.Duration `mapstructure:"-"`
}
// Merge combines the supplied config with the receiver. Supplied ones take
// priority.
func (to *AutopilotConfig) Merge(from *AutopilotConfig) {
if from == nil {
return
}
if from.CleanupDeadServersValue != CleanupDeadServersUnset {
to.CleanupDeadServers = from.CleanupDeadServersValue.Value()
}
if from.MinQuorum != 0 {
to.MinQuorum = from.MinQuorum
}
if from.LastContactThreshold != 0 {
to.LastContactThreshold = from.LastContactThreshold
}
if from.DeadServerLastContactThreshold != 0 {
to.DeadServerLastContactThreshold = from.DeadServerLastContactThreshold
}
if from.MaxTrailingLogs != 0 {
to.MaxTrailingLogs = from.MaxTrailingLogs
}
if from.ServerStabilizationTime != 0 {
to.ServerStabilizationTime = from.ServerStabilizationTime
}
}
// Clone returns a duplicate instance of AutopilotConfig with the exact same values.
func (ac *AutopilotConfig) Clone() *AutopilotConfig {
if ac == nil {
return nil
}
return &AutopilotConfig{
CleanupDeadServers: ac.CleanupDeadServers,
LastContactThreshold: ac.LastContactThreshold,
DeadServerLastContactThreshold: ac.DeadServerLastContactThreshold,
MaxTrailingLogs: ac.MaxTrailingLogs,
MinQuorum: ac.MinQuorum,
ServerStabilizationTime: ac.ServerStabilizationTime,
}
}
// MarshalJSON makes the autopilot config fields JSON compatible
func (ac *AutopilotConfig) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]interface{}{
"cleanup_dead_servers": ac.CleanupDeadServers,
"cleanup_dead_servers_value": ac.CleanupDeadServersValue,
"last_contact_threshold": ac.LastContactThreshold.String(),
"dead_server_last_contact_threshold": ac.DeadServerLastContactThreshold.String(),
"max_trailing_logs": ac.MaxTrailingLogs,
"min_quorum": ac.MinQuorum,
"server_stabilization_time": ac.ServerStabilizationTime.String(),
})
}
// UnmarshalJSON parses the autopilot config JSON blob
func (ac *AutopilotConfig) UnmarshalJSON(b []byte) error {
var data interface{}
err := json.Unmarshal(b, &data)
if err != nil {
return err
}
conf := data.(map[string]interface{})
if err = mapstructure.WeakDecode(conf, ac); err != nil {
return err
}
if ac.LastContactThreshold, err = parseutil.ParseDurationSecond(conf["last_contact_threshold"]); err != nil {
return err
}
if ac.DeadServerLastContactThreshold, err = parseutil.ParseDurationSecond(conf["dead_server_last_contact_threshold"]); err != nil {
return err
}
if ac.ServerStabilizationTime, err = parseutil.ParseDurationSecond(conf["server_stabilization_time"]); err != nil {
return err
}
return nil
}
// FollowerState represents the information about peer that the leader tracks.
type FollowerState struct {
AppliedIndex uint64
LastHeartbeat time.Time
LastTerm uint64
IsDead *atomic.Bool
DesiredSuffrage string
}
// FollowerStates holds information about all the followers in the raft cluster
// tracked by the leader.
type FollowerStates struct {
l sync.RWMutex
followers map[string]*FollowerState
}
// NewFollowerStates creates a new FollowerStates object
func NewFollowerStates() *FollowerStates {
return &FollowerStates{
followers: make(map[string]*FollowerState),
}
}
// Update the peer information in the follower states
func (s *FollowerStates) Update(nodeID string, appliedIndex uint64, term uint64, desiredSuffrage string) {
s.l.Lock()
defer s.l.Unlock()
state, ok := s.followers[nodeID]
if !ok {
state = &FollowerState{
IsDead: atomic.NewBool(false),
}
s.followers[nodeID] = state
}
state.AppliedIndex = appliedIndex
state.LastTerm = term
state.DesiredSuffrage = desiredSuffrage
state.LastHeartbeat = time.Now()
}
// Clear wipes all the information regarding peers in the follower states.
func (s *FollowerStates) Clear() {
s.l.Lock()
for i := range s.followers {
delete(s.followers, i)
}
s.l.Unlock()
}
// Delete the entry of a peer represented by the nodeID from follower states.
func (s *FollowerStates) Delete(nodeID string) {
s.l.Lock()
delete(s.followers, nodeID)
s.l.Unlock()
}
// MinIndex returns the minimum raft index applied in the raft cluster.
func (s *FollowerStates) MinIndex() uint64 {
var min uint64 = math.MaxUint64
minFunc := func(a, b uint64) uint64 {
if a > b {
return b
}
return a
}
s.l.RLock()
for _, state := range s.followers {
min = minFunc(min, state.AppliedIndex)
}
s.l.RUnlock()
if min == math.MaxUint64 {
return 0
}
return min
}
// Ensure that the Delegate implements the ApplicationIntegration interface
var _ autopilot.ApplicationIntegration = (*Delegate)(nil)
// Delegate is an implementation of autopilot.ApplicationIntegration interface.
// This is used by the autopilot library to retrieve information and to have
// application specific tasks performed.
type Delegate struct {
*RaftBackend
// dl is a lock dedicated for guarding delegate's fields
dl sync.RWMutex
inflightRemovals map[raft.ServerID]bool
}
func newDelegate(b *RaftBackend) *Delegate {
return &Delegate{
RaftBackend: b,
inflightRemovals: make(map[raft.ServerID]bool),
}
}
// AutopilotConfig is called by the autopilot library to know the desired
// autopilot configuration.
func (d *Delegate) AutopilotConfig() *autopilot.Config {
d.l.RLock()
config := &autopilot.Config{
CleanupDeadServers: d.autopilotConfig.CleanupDeadServers,
LastContactThreshold: d.autopilotConfig.LastContactThreshold,
MaxTrailingLogs: d.autopilotConfig.MaxTrailingLogs,
MinQuorum: d.autopilotConfig.MinQuorum,
ServerStabilizationTime: d.autopilotConfig.ServerStabilizationTime,
Ext: d.autopilotConfigExt(),
}
d.l.RUnlock()
return config
}
// NotifyState is called by the autopilot library whenever there is a state
// change. We update a few metrics when this happens.
func (d *Delegate) NotifyState(state *autopilot.State) {
if d.raft.State() == raft.Leader {
metrics.SetGauge([]string{"autopilot", "failure_tolerance"}, float32(state.FailureTolerance))
if state.Healthy {
metrics.SetGauge([]string{"autopilot", "healthy"}, 1)
} else {
metrics.SetGauge([]string{"autopilot", "healthy"}, 0)
}
for id, state := range state.Servers {
labels := []metrics.Label{
{
Name: "node_id",
Value: string(id),
},
}
if state.Health.Healthy {
metrics.SetGaugeWithLabels([]string{"autopilot", "node", "healthy"}, 1, labels)
} else {
metrics.SetGaugeWithLabels([]string{"autopilot", "node", "healthy"}, 0, labels)
}
}
}
}
// FetchServerStats is called by the autopilot library to retrieve information
// about all the nodes in the raft cluster.
func (d *Delegate) FetchServerStats(ctx context.Context, servers map[raft.ServerID]*autopilot.Server) map[raft.ServerID]*autopilot.ServerStats {
ret := make(map[raft.ServerID]*autopilot.ServerStats)
d.l.RLock()
followerStates := d.followerStates
d.l.RUnlock()
followerStates.l.RLock()
defer followerStates.l.RUnlock()
now := time.Now()
for id, followerState := range followerStates.followers {
ret[raft.ServerID(id)] = &autopilot.ServerStats{
LastContact: now.Sub(followerState.LastHeartbeat),
LastTerm: followerState.LastTerm,
LastIndex: followerState.AppliedIndex,
}
}
leaderState, _ := d.fsm.LatestState()
ret[raft.ServerID(d.localID)] = &autopilot.ServerStats{
LastTerm: leaderState.Term,
LastIndex: leaderState.Index,
}
return ret
}
// KnownServers is called by the autopilot library to know the status of each
// node in the raft cluster. If the application thinks that certain nodes left,
// it is here that we let the autopilot library know of the same.
func (d *Delegate) KnownServers() map[raft.ServerID]*autopilot.Server {
d.l.RLock()
defer d.l.RUnlock()
future := d.raft.GetConfiguration()
if err := future.Error(); err != nil {
d.logger.Error("failed to get raft configuration when computing known servers", "error", err)
return nil
}
servers := future.Configuration().Servers
serverIDs := make([]string, 0, len(servers))
for _, server := range servers {
serverIDs = append(serverIDs, string(server.ID))
}
d.followerStates.l.RLock()
defer d.followerStates.l.RUnlock()
ret := make(map[raft.ServerID]*autopilot.Server)
for id, state := range d.followerStates.followers {
// If the server is not in raft configuration, even if we received a follower
// heartbeat, it shouldn't be a known server for autopilot.
if !strutil.StrListContains(serverIDs, id) {
continue
}
server := &autopilot.Server{
ID: raft.ServerID(id),
Name: id,
RaftVersion: raft.ProtocolVersionMax,
Ext: d.autopilotServerExt(state.DesiredSuffrage),
}
switch state.IsDead.Load() {
case true:
d.logger.Debug("informing autopilot that the node left", "id", id)
server.NodeStatus = autopilot.NodeLeft
default:
server.NodeStatus = autopilot.NodeAlive
}
ret[raft.ServerID(id)] = server
}
// Add the leader
ret[raft.ServerID(d.localID)] = &autopilot.Server{
ID: raft.ServerID(d.localID),
Name: d.localID,
RaftVersion: raft.ProtocolVersionMax,
NodeStatus: autopilot.NodeAlive,
Ext: d.autopilotServerExt("voter"),
}
return ret
}
// RemoveFailedServer is called by the autopilot library when it desires a node
// to be removed from the raft configuration. This function removes the node
// from the raft cluster and stops tracking its information in follower states.
// This function needs to return quickly. Hence removal is performed in a
// goroutine.
func (d *Delegate) RemoveFailedServer(server *autopilot.Server) {
go func() {
added := false
defer func() {
if added {
d.dl.Lock()
delete(d.inflightRemovals, server.ID)
d.dl.Unlock()
}
}()
d.dl.Lock()
_, ok := d.inflightRemovals[server.ID]
if ok {
d.logger.Info("removal of dead server is already initiated", "id", server.ID)
d.dl.Unlock()
return
}
added = true
d.inflightRemovals[server.ID] = true
d.dl.Unlock()
d.logger.Info("removing dead server from raft configuration", "id", server.ID)
if future := d.raft.RemoveServer(server.ID, 0, 0); future.Error() != nil {
d.logger.Error("failed to remove server", "server_id", server.ID, "server_address", server.Address, "server_name", server.Name, "error", future.Error())
return
}
d.followerStates.Delete(string(server.ID))
}()
}
// SetFollowerStates sets the followerStates field in the backend to track peers
// in the raft cluster.
func (b *RaftBackend) SetFollowerStates(states *FollowerStates) {
b.l.Lock()
b.followerStates = states
b.l.Unlock()
}
// SetAutopilotConfig updates the autopilot configuration in the backend.
func (b *RaftBackend) SetAutopilotConfig(config *AutopilotConfig) {
b.l.Lock()
b.autopilotConfig = config
b.logger.Info("updated autopilot configuration", "config", b.autopilotConfig)
b.l.Unlock()
}
// AutopilotConfig returns the autopilot configuration in the backend.
func (b *RaftBackend) AutopilotConfig() *AutopilotConfig {
b.l.RLock()
defer b.l.RUnlock()
return b.autopilotConfig.Clone()
}
func (b *RaftBackend) defaultAutopilotConfig() *AutopilotConfig {
return &AutopilotConfig{
CleanupDeadServers: false,
LastContactThreshold: 10 * time.Second,
DeadServerLastContactThreshold: 24 * time.Hour,
MaxTrailingLogs: 1000,
ServerStabilizationTime: 10 * time.Second,
}
}
func (b *RaftBackend) AutopilotDisabled() bool {
b.l.RLock()
disabled := b.disableAutopilot
b.l.RUnlock()
return disabled
}
// AutopilotExecutionStatus represents the current status of the autopilot background go routines
type AutopilotExecutionStatus string
const (
AutopilotNotRunning AutopilotExecutionStatus = "not-running"
AutopilotRunning AutopilotExecutionStatus = "running"
AutopilotShuttingDown AutopilotExecutionStatus = "shutting-down"
)
func autopilotStatusToStatus(status autopilot.ExecutionStatus) AutopilotExecutionStatus {
switch status {
case autopilot.Running:
return AutopilotRunning
case autopilot.ShuttingDown:
return AutopilotShuttingDown
default:
return AutopilotNotRunning
}
}
func (b *RaftBackend) startFollowerHeartbeatTracker() {
b.l.RLock()
tickerCh := b.followerHeartbeatTicker.C
b.l.RUnlock()
for _ = range tickerCh {
b.l.RLock()
if b.autopilotConfig.CleanupDeadServers && b.autopilotConfig.DeadServerLastContactThreshold != 0 {
b.followerStates.l.RLock()
for _, state := range b.followerStates.followers {
if state.LastHeartbeat.IsZero() || state.IsDead.Load() {
continue
}
now := time.Now()
if now.After(state.LastHeartbeat.Add(b.autopilotConfig.DeadServerLastContactThreshold)) {
state.IsDead.Store(true)
}
}
b.followerStates.l.RUnlock()
}
b.l.RUnlock()
}
}
// StopAutopilot stops a running autopilot instance. This should only be called
// on the active node.
func (b *RaftBackend) StopAutopilot() {
b.l.Lock()
defer b.l.Unlock()
if b.autopilot == nil {
return
}
b.autopilot.Stop()
b.followerHeartbeatTicker.Stop()
}
// AutopilotState represents the health information retrieved from autopilot.
type AutopilotState struct {
ExecutionStatus AutopilotExecutionStatus `json:"execution_status"`
Healthy bool `json:"healthy"`
FailureTolerance int `json:"failure_tolerance"`
OptimisticFailureTolerance int `json:"optimistic_failure_tolerance"`
Servers map[string]*AutopilotServer `json:"servers"`
Leader string `json:"leader"`
Voters []string `json:"voters"`
NonVoters []string `json:"non_voters,omitempty"`
}
// AutopilotServer represents the health information of individual server node
// retrieved from autopilot.
type AutopilotServer struct {
ID string `json:"id"`
Name string `json:"name"`
Address string `json:"address"`
NodeStatus string `json:"node_status"`
LastContact *ReadableDuration `json:"last_contact"`
LastTerm uint64 `json:"last_term"`
LastIndex uint64 `json:"last_index"`
Healthy bool `json:"healthy"`
StableSince time.Time `json:"stable_since"`
Status string `json:"status"`
Meta map[string]string `json:"meta"`
}
// ReadableDuration is a duration type that is serialized to JSON in human readable format.
type ReadableDuration time.Duration
func NewReadableDuration(dur time.Duration) *ReadableDuration {
d := ReadableDuration(dur)
return &d
}
func (d *ReadableDuration) String() string {
return d.Duration().String()
}
func (d *ReadableDuration) Duration() time.Duration {
if d == nil {
return time.Duration(0)
}
return time.Duration(*d)
}
func (d *ReadableDuration) MarshalJSON() ([]byte, error) {
return []byte(fmt.Sprintf(`"%s"`, d.Duration().String())), nil
}
func (d *ReadableDuration) UnmarshalJSON(raw []byte) (err error) {
if d == nil {
return fmt.Errorf("cannot unmarshal to nil pointer")
}
var dur time.Duration
str := string(raw)
if len(str) >= 2 && str[0] == '"' && str[len(str)-1] == '"' {
// quoted string
dur, err = time.ParseDuration(str[1 : len(str)-1])
if err != nil {
return err
}
} else {
// no quotes, not a string
v, err := strconv.ParseFloat(str, 64)
if err != nil {
return err
}
dur = time.Duration(v)
}
*d = ReadableDuration(dur)
return nil
}
func stringIDs(ids []raft.ServerID) []string {
out := make([]string, len(ids))
for i, id := range ids {
out[i] = string(id)
}
return out
}
func autopilotToAPIState(state *autopilot.State) *AutopilotState {
out := &AutopilotState{
Healthy: state.Healthy,
FailureTolerance: state.FailureTolerance,
Leader: string(state.Leader),
Voters: stringIDs(state.Voters),
Servers: make(map[string]*AutopilotServer),
}
for id, srv := range state.Servers {
out.Servers[string(id)] = autopilotToAPIServer(srv)
}
autopilotToAPIStateEnterprise(state, out)
return out
}
func autopilotToAPIServer(srv *autopilot.ServerState) *AutopilotServer {
apiSrv := &AutopilotServer{
ID: string(srv.Server.ID),
Name: srv.Server.Name,
Address: string(srv.Server.Address),
NodeStatus: string(srv.Server.NodeStatus),
LastContact: NewReadableDuration(srv.Stats.LastContact),
LastTerm: srv.Stats.LastTerm,
LastIndex: srv.Stats.LastIndex,
Healthy: srv.Health.Healthy,
StableSince: srv.Health.StableSince,
Status: string(srv.State),
Meta: srv.Server.Meta,
}
autopilotToAPIServerEnterprise(srv, apiSrv)
return apiSrv
}
// GetAutopilotServerState retrieves raft cluster state from autopilot to
// return over the API.
func (b *RaftBackend) GetAutopilotServerState(ctx context.Context) (*AutopilotState, error) {
b.l.RLock()
defer b.l.RUnlock()
if b.raft == nil {
return nil, errors.New("raft storage is not initialized")
}
if b.autopilot == nil {
return nil, nil
}
apState := b.autopilot.GetState()
if apState == nil {
return nil, nil
}
state := autopilotToAPIState(apState)
apStatus, _ := b.autopilot.IsRunning()
state.ExecutionStatus = autopilotStatusToStatus(apStatus)
return state, nil
}
func (b *RaftBackend) DisableAutopilot() {
b.l.Lock()
b.disableAutopilot = true
b.l.Unlock()
}
// SetupAutopilot gathers information required to configure autopilot and starts
// it. If autopilot is disabled, this function does nothing.
func (b *RaftBackend) SetupAutopilot(ctx context.Context, storageConfig *AutopilotConfig, followerStates *FollowerStates, disable bool) {
b.l.Lock()
if disable || os.Getenv("VAULT_RAFT_AUTOPILOT_DISABLE") != "" {
b.disableAutopilot = true
}
if b.disableAutopilot {
b.logger.Info("disabling autopilot")
b.l.Unlock()
return
}
// Start with a default config
b.autopilotConfig = b.defaultAutopilotConfig()
// Merge the setting provided over the API
b.autopilotConfig.Merge(storageConfig)
// Create the autopilot instance
b.autopilot = autopilot.New(b.raft, newDelegate(b), autopilot.WithLogger(b.logger), autopilot.WithPromoter(b.autopilotPromoter()))
b.followerStates = followerStates
b.followerHeartbeatTicker = time.NewTicker(1 * time.Second)
b.l.Unlock()
b.logger.Info("starting autopilot", "config", b.autopilotConfig)
b.autopilot.Start(ctx)
go b.startFollowerHeartbeatTracker()
}

View File

@ -87,6 +87,8 @@ func getRaftWithDir(t testing.TB, bootstrap bool, noStoreState bool, raftDir str
}
backend.DisableAutopilot()
return backend, raftDir
}

View File

@ -5,11 +5,34 @@ package raft
import (
"context"
"errors"
autopilot "github.com/hashicorp/raft-autopilot"
)
const nonVotersAllowed = false
// AddPeer adds a new server to the raft cluster
func (b *RaftBackend) AddNonVotingPeer(ctx context.Context, peerID, clusterAddr string) error {
return errors.New("not implemented")
func (b *RaftBackend) autopilotPromoter() autopilot.Promoter {
return autopilot.DefaultPromoter()
}
// AddNonVotingPeer adds a new server to the raft cluster
func (b *RaftBackend) AddNonVotingPeer(ctx context.Context, peerID, clusterAddr string) error {
return errors.New("adding non voting peer is not allowed")
}
func autopilotToAPIServerEnterprise(_ *autopilot.ServerState, _ *AutopilotServer) {
// noop in oss
}
func autopilotToAPIStateEnterprise(state *autopilot.State, apiState *AutopilotState) {
// Both are same in OSS
apiState.OptimisticFailureTolerance = state.FailureTolerance
}
func (d *Delegate) autopilotConfigExt() interface{} {
return nil
}
func (d *Delegate) autopilotServerExt(_ string) interface{} {
return nil
}

View File

@ -590,7 +590,7 @@ func TestBoltSnapshotStore_Listing(t *testing.T) {
Level: hclog.Trace,
})
fsm, err := NewFSM(parent, logger)
fsm, err := NewFSM(parent, "", logger)
if err != nil {
t.Fatal(err)
}
@ -655,7 +655,7 @@ func TestBoltSnapshotStore_CreateInstallSnapshot(t *testing.T) {
Level: hclog.Trace,
})
fsm, err := NewFSM(parent, logger)
fsm, err := NewFSM(parent, "", logger)
if err != nil {
t.Fatal(err)
}
@ -753,7 +753,7 @@ func TestBoltSnapshotStore_CreateInstallSnapshot(t *testing.T) {
t.Fatal("expected snapshot installer object")
}
newFSM, err := NewFSM(filepath.Dir(installer.Filename()), logger)
newFSM, err := NewFSM(filepath.Dir(installer.Filename()), "", logger)
if err != nil {
t.Fatal(err)
}
@ -812,7 +812,7 @@ func TestBoltSnapshotStore_CreateInstallSnapshot(t *testing.T) {
// Close/Reopen the db and make sure we still match
fsm.Close()
fsm, err = NewFSM(parent, logger)
fsm, err = NewFSM(parent, "", logger)
if err != nil {
t.Fatal(err)
}

View File

@ -320,6 +320,53 @@ func (x *ConfigurationValue) GetServers() []*Server {
return nil
}
type LocalNodeConfigValue struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
DesiredSuffrage string `protobuf:"bytes,1,opt,name=desired_suffrage,json=desiredSuffrage,proto3" json:"desired_suffrage,omitempty"`
}
func (x *LocalNodeConfigValue) Reset() {
*x = LocalNodeConfigValue{}
if protoimpl.UnsafeEnabled {
mi := &file_physical_raft_types_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *LocalNodeConfigValue) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*LocalNodeConfigValue) ProtoMessage() {}
func (x *LocalNodeConfigValue) ProtoReflect() protoreflect.Message {
mi := &file_physical_raft_types_proto_msgTypes[5]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use LocalNodeConfigValue.ProtoReflect.Descriptor instead.
func (*LocalNodeConfigValue) Descriptor() ([]byte, []int) {
return file_physical_raft_types_proto_rawDescGZIP(), []int{5}
}
func (x *LocalNodeConfigValue) GetDesiredSuffrage() string {
if x != nil {
return x.DesiredSuffrage
}
return ""
}
var File_physical_raft_types_proto protoreflect.FileDescriptor
var file_physical_raft_types_proto_rawDesc = []byte{
@ -349,10 +396,14 @@ var file_physical_raft_types_proto_rawDesc = []byte{
0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x26, 0x0a, 0x07, 0x73,
0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0c, 0x2e, 0x72,
0x61, 0x66, 0x74, 0x2e, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x52, 0x07, 0x73, 0x65, 0x72, 0x76,
0x65, 0x72, 0x73, 0x42, 0x2a, 0x5a, 0x28, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f,
0x6d, 0x2f, 0x68, 0x61, 0x73, 0x68, 0x69, 0x63, 0x6f, 0x72, 0x70, 0x2f, 0x76, 0x61, 0x75, 0x6c,
0x74, 0x2f, 0x70, 0x68, 0x79, 0x73, 0x69, 0x63, 0x61, 0x6c, 0x2f, 0x72, 0x61, 0x66, 0x74, 0x62,
0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
0x65, 0x72, 0x73, 0x22, 0x41, 0x0a, 0x14, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x4e, 0x6f, 0x64, 0x65,
0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x29, 0x0a, 0x10, 0x64,
0x65, 0x73, 0x69, 0x72, 0x65, 0x64, 0x5f, 0x73, 0x75, 0x66, 0x66, 0x72, 0x61, 0x67, 0x65, 0x18,
0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x64, 0x65, 0x73, 0x69, 0x72, 0x65, 0x64, 0x53, 0x75,
0x66, 0x66, 0x72, 0x61, 0x67, 0x65, 0x42, 0x2a, 0x5a, 0x28, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62,
0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x68, 0x61, 0x73, 0x68, 0x69, 0x63, 0x6f, 0x72, 0x70, 0x2f, 0x76,
0x61, 0x75, 0x6c, 0x74, 0x2f, 0x70, 0x68, 0x79, 0x73, 0x69, 0x63, 0x61, 0x6c, 0x2f, 0x72, 0x61,
0x66, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (
@ -367,13 +418,14 @@ func file_physical_raft_types_proto_rawDescGZIP() []byte {
return file_physical_raft_types_proto_rawDescData
}
var file_physical_raft_types_proto_msgTypes = make([]protoimpl.MessageInfo, 5)
var file_physical_raft_types_proto_msgTypes = make([]protoimpl.MessageInfo, 6)
var file_physical_raft_types_proto_goTypes = []interface{}{
(*LogOperation)(nil), // 0: raft.LogOperation
(*LogData)(nil), // 1: raft.LogData
(*IndexValue)(nil), // 2: raft.IndexValue
(*Server)(nil), // 3: raft.Server
(*ConfigurationValue)(nil), // 4: raft.ConfigurationValue
(*LogOperation)(nil), // 0: raft.LogOperation
(*LogData)(nil), // 1: raft.LogData
(*IndexValue)(nil), // 2: raft.IndexValue
(*Server)(nil), // 3: raft.Server
(*ConfigurationValue)(nil), // 4: raft.ConfigurationValue
(*LocalNodeConfigValue)(nil), // 5: raft.LocalNodeConfigValue
}
var file_physical_raft_types_proto_depIdxs = []int32{
0, // 0: raft.LogData.operations:type_name -> raft.LogOperation
@ -451,6 +503,18 @@ func file_physical_raft_types_proto_init() {
return nil
}
}
file_physical_raft_types_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*LocalNodeConfigValue); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
}
type x struct{}
out := protoimpl.TypeBuilder{
@ -458,7 +522,7 @@ func file_physical_raft_types_proto_init() {
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: file_physical_raft_types_proto_rawDesc,
NumEnums: 0,
NumMessages: 5,
NumMessages: 6,
NumExtensions: 0,
NumServices: 0,
},

View File

@ -37,3 +37,7 @@ message ConfigurationValue {
uint64 index = 1;
repeated Server servers = 2;
}
message LocalNodeConfigValue{
string desired_suffrage = 1;
}

View File

@ -26,6 +26,8 @@ import (
"sync/atomic"
"time"
"github.com/hashicorp/vault/physical/raft"
"github.com/armon/go-metrics"
"github.com/hashicorp/errwrap"
log "github.com/hashicorp/go-hclog"
@ -517,8 +519,8 @@ type Core struct {
// Stores request counters
counters counters
// Stores the raft applied index for standby nodes
raftFollowerStates *raftFollowerStates
// raftFollowerStates tracks information about all the raft follower nodes.
raftFollowerStates *raft.FollowerStates
// Stop channel for raft TLS rotations
raftTLSRotationStopCh chan struct{}
// Stores the pending peers we are waiting to give answers
@ -562,6 +564,9 @@ type Core struct {
numExpirationWorkers int
IndexHeaderHMACKey uberAtomic.Value
// disableAutopilot is used to disable the autopilot subsystem in raft storage
disableAutopilot bool
}
// CoreConfig is used to parameterize a core
@ -667,6 +672,9 @@ type CoreConfig struct {
// number of workers to use for lease revocation in the expiration manager
NumExpirationWorkers int
// DisableAutopilot is used to disable autopilot subsystem in raft storage
DisableAutopilot bool
}
// GetServiceRegistration returns the config's ServiceRegistration, or nil if it does
@ -813,6 +821,8 @@ func NewCore(conf *CoreConfig) (*Core, error) {
activityLogConfig: conf.ActivityLogConfig,
keyRotateGracePeriod: new(int64),
numExpirationWorkers: conf.NumExpirationWorkers,
raftFollowerStates: raft.NewFollowerStates(),
disableAutopilot: conf.DisableAutopilot,
}
c.standbyStopCh.Store(make(chan struct{}))
atomic.StoreUint32(c.sealed, 1)

View File

@ -0,0 +1,211 @@
package rafttests
import (
"context"
"math"
"testing"
"time"
"github.com/hashicorp/vault/api"
"github.com/kr/pretty"
autopilot "github.com/hashicorp/raft-autopilot"
"github.com/stretchr/testify/require"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/helper/testhelpers"
"github.com/hashicorp/vault/physical/raft"
"github.com/hashicorp/vault/vault"
)
func TestRaft_Autopilot_Disable(t *testing.T) {
cluster := raftCluster(t, &RaftClusterOpts{
DisableFollowerJoins: true,
InmemCluster: true,
// Not setting EnableAutopilot here.
})
defer cluster.Cleanup()
client := cluster.Cores[0].Client
state, err := client.Sys().RaftAutopilotState()
require.NoError(t, err)
require.Nil(t, nil, state)
}
func TestRaft_Autopilot_Stabilization_And_State(t *testing.T) {
cluster := raftCluster(t, &RaftClusterOpts{
DisableFollowerJoins: true,
InmemCluster: true,
EnableAutopilot: true,
})
defer cluster.Cleanup()
// Check that autopilot execution state is running
client := cluster.Cores[0].Client
state, err := client.Sys().RaftAutopilotState()
require.NoError(t, err)
require.Equal(t, api.AutopilotRunning, state.ExecutionStatus)
require.Equal(t, true, state.Healthy)
require.Len(t, state.Servers, 1)
require.Equal(t, "core-0", state.Servers["core-0"].ID)
require.Equal(t, "alive", state.Servers["core-0"].NodeStatus)
require.Equal(t, "leader", state.Servers["core-0"].Status)
config, err := client.Sys().RaftAutopilotConfiguration()
require.NoError(t, err)
// Wait for 110% of the stabilization time to add nodes
stabilizationKickOffWaitDuration := time.Duration(math.Ceil(1.1 * float64(config.ServerStabilizationTime)))
time.Sleep(stabilizationKickOffWaitDuration)
joinAndStabilizeFunc := func(core *vault.TestClusterCore, nodeID string, numServers int) {
joinFunc := func(core *vault.TestClusterCore) {
_, err := core.JoinRaftCluster(namespace.RootContext(context.Background()), []*raft.LeaderJoinInfo{
{
LeaderAPIAddr: client.Address(),
TLSConfig: cluster.Cores[0].TLSConfig,
Retry: true,
},
}, false)
require.NoError(t, err)
time.Sleep(1 * time.Second)
cluster.UnsealCore(t, core)
}
joinFunc(core)
state, err = client.Sys().RaftAutopilotState()
require.NoError(t, err)
require.Equal(t, false, state.Healthy)
require.Len(t, state.Servers, numServers)
require.Equal(t, false, state.Servers[nodeID].Healthy)
require.Equal(t, "alive", state.Servers[nodeID].NodeStatus)
require.Equal(t, "non-voter", state.Servers[nodeID].Status)
// Wait till the stabilization period is over
stabilizationWaitDuration := time.Duration(float64(config.ServerStabilizationTime))
deadline := time.Now().Add(stabilizationWaitDuration)
healthy := false
for time.Now().Before(deadline) {
state, err := client.Sys().RaftAutopilotState()
require.NoError(t, err)
if state.Healthy {
healthy = true
}
time.Sleep(1 * time.Second)
}
if !healthy {
t.Fatalf("cluster failed to stabilize")
}
// Now that the server is stable, wait for autopilot to reconcile and
// promotion to happen. Reconcile interval is 10 seconds. Bound it by
// doubling.
deadline = time.Now().Add(2 * autopilot.DefaultReconcileInterval)
failed := true
for time.Now().Before(deadline) {
state, err = client.Sys().RaftAutopilotState()
require.NoError(t, err)
if state.Servers[nodeID].Status == "voter" {
failed = false
break
}
time.Sleep(1 * time.Second)
}
if failed {
t.Fatalf("autopilot failed to promote node: id: %#v: state:%# v\n", nodeID, pretty.Formatter(state))
}
}
joinAndStabilizeFunc(cluster.Cores[1], "core-1", 2)
joinAndStabilizeFunc(cluster.Cores[2], "core-2", 3)
state, err = client.Sys().RaftAutopilotState()
require.NoError(t, err)
require.Equal(t, []string{"core-0", "core-1", "core-2"}, state.Voters)
}
func TestRaft_Autopilot_Configuration(t *testing.T) {
cluster := raftCluster(t, &RaftClusterOpts{
DisableFollowerJoins: true,
InmemCluster: true,
EnableAutopilot: true,
})
defer cluster.Cleanup()
client := cluster.Cores[0].Client
configCheckFunc := func(config *api.AutopilotConfig) {
conf, err := client.Sys().RaftAutopilotConfiguration()
require.NoError(t, err)
require.Equal(t, config, conf)
}
writeConfigFunc := func(config map[string]interface{}, expectError bool) {
resp, err := client.Logical().Write("sys/storage/raft/autopilot/configuration", config)
if expectError {
require.Error(t, err)
return
}
require.NoError(t, err)
require.Nil(t, resp)
}
// Ensure autopilot's default config has taken effect
config := &api.AutopilotConfig{
CleanupDeadServers: false,
DeadServerLastContactThreshold: 24 * time.Hour,
LastContactThreshold: 10 * time.Second,
MaxTrailingLogs: 1000,
ServerStabilizationTime: 10 * time.Second,
}
configCheckFunc(config)
// Update config
writableConfig := map[string]interface{}{
"cleanup_dead_servers": true,
"dead_server_last_contact_threshold": "100h",
"last_contact_threshold": "100s",
"max_trailing_logs": 100,
"min_quorum": 100,
"server_stabilization_time": "100s",
}
writeConfigFunc(writableConfig, false)
// Ensure update has taken effect
config.CleanupDeadServers = true
config.DeadServerLastContactThreshold = 100 * time.Hour
config.LastContactThreshold = 100 * time.Second
config.MaxTrailingLogs = 100
config.MinQuorum = 100
config.ServerStabilizationTime = 100 * time.Second
configCheckFunc(config)
// Update some fields and leave the rest as it is.
writableConfig = map[string]interface{}{
"dead_server_last_contact_threshold": "50h",
"max_trailing_logs": 50,
"server_stabilization_time": "50s",
}
writeConfigFunc(writableConfig, false)
// Check update
config.DeadServerLastContactThreshold = 50 * time.Hour
config.MaxTrailingLogs = 50
config.ServerStabilizationTime = 50 * time.Second
configCheckFunc(config)
// Check error case
writableConfig = map[string]interface{}{
"min_quorum": 2,
"dead_server_last_contact_threshold": "48h",
}
writeConfigFunc(writableConfig, true)
configCheckFunc(config)
// Ensure that the configuration stays across reboots
leaderCore := cluster.Cores[0]
testhelpers.EnsureCoreSealed(t, cluster.Cores[0])
cluster.UnsealCore(t, leaderCore)
vault.TestWaitActive(t, leaderCore.Core)
configCheckFunc(config)
}

View File

@ -8,6 +8,7 @@ import (
"io/ioutil"
"net/http"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
@ -23,28 +24,60 @@ import (
vaulthttp "github.com/hashicorp/vault/http"
"github.com/hashicorp/vault/physical/raft"
"github.com/hashicorp/vault/sdk/helper/logging"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/vault/vault"
vaultcluster "github.com/hashicorp/vault/vault/cluster"
"github.com/stretchr/testify/require"
"golang.org/x/net/http2"
)
func raftCluster(t testing.TB) *vault.TestCluster {
return raftClusterWithPerfStandby(t, false)
type RaftClusterOpts struct {
DisableFollowerJoins bool
InmemCluster bool
EnableAutopilot bool
PhysicalFactoryConfig map[string]interface{}
DisablePerfStandby bool
}
func raftClusterWithPerfStandby(t testing.TB, disablePerfStandby bool) *vault.TestCluster {
func raftCluster(t testing.TB, ropts *RaftClusterOpts) *vault.TestCluster {
if ropts == nil {
ropts = &RaftClusterOpts{}
}
conf := &vault.CoreConfig{
CredentialBackends: map[string]logical.Factory{
"userpass": credUserpass.Factory,
},
DisableAutopilot: !ropts.EnableAutopilot,
}
conf.DisablePerformanceStandby = disablePerfStandby
var opts = vault.TestClusterOptions{HandlerFunc: vaulthttp.Handler}
var opts = vault.TestClusterOptions{
HandlerFunc: vaulthttp.Handler,
}
opts.Logger = logging.NewVaultLogger(hclog.Trace).Named(t.Name())
if ropts.InmemCluster {
inmemCluster, err := vaultcluster.NewInmemLayerCluster("inmem-cluster", 3, hclog.New(&hclog.LoggerOptions{
Mutex: &sync.Mutex{},
Level: hclog.Trace,
Name: "inmem-cluster",
}))
if err != nil {
t.Fatal(err)
}
opts.ClusterLayers = inmemCluster
}
opts.PhysicalFactoryConfig = ropts.PhysicalFactoryConfig
conf.DisablePerformanceStandby = ropts.DisablePerfStandby
teststorage.RaftBackendSetup(conf, &opts)
if ropts.DisableFollowerJoins {
opts.SetupFunc = nil
}
cluster := vault.NewTestCluster(t, conf, &opts)
cluster.Start()
vault.TestWaitActive(t, cluster.Cores[0].Core)
@ -230,7 +263,7 @@ func TestRaft_Join(t *testing.T) {
func TestRaft_RemovePeer(t *testing.T) {
t.Parallel()
cluster := raftCluster(t)
cluster := raftCluster(t, nil)
defer cluster.Cleanup()
for i, c := range cluster.Cores {
@ -273,7 +306,7 @@ func TestRaft_RemovePeer(t *testing.T) {
func TestRaft_Configuration(t *testing.T) {
t.Parallel()
cluster := raftCluster(t)
cluster := raftCluster(t, nil)
defer cluster.Cleanup()
for i, c := range cluster.Cores {
@ -320,7 +353,7 @@ func TestRaft_Configuration(t *testing.T) {
func TestRaft_ShamirUnseal(t *testing.T) {
t.Parallel()
cluster := raftCluster(t)
cluster := raftCluster(t, nil)
defer cluster.Cleanup()
for i, c := range cluster.Cores {
@ -332,7 +365,7 @@ func TestRaft_ShamirUnseal(t *testing.T) {
func TestRaft_SnapshotAPI(t *testing.T) {
t.Parallel()
cluster := raftCluster(t)
cluster := raftCluster(t, nil)
defer cluster.Cleanup()
leaderClient := cluster.Cores[0].Client
@ -467,7 +500,7 @@ func TestRaft_SnapshotAPI_RekeyRotate_Backward(t *testing.T) {
tCaseLocal := tCase
t.Parallel()
cluster := raftClusterWithPerfStandby(t, tCaseLocal.DisablePerfStandby)
cluster := raftCluster(t, &RaftClusterOpts{DisablePerfStandby: tCaseLocal.DisablePerfStandby})
defer cluster.Cleanup()
leaderClient := cluster.Cores[0].Client
@ -668,7 +701,7 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) {
tCaseLocal := tCase
t.Parallel()
cluster := raftClusterWithPerfStandby(t, tCaseLocal.DisablePerfStandby)
cluster := raftCluster(t, &RaftClusterOpts{DisablePerfStandby: tCaseLocal.DisablePerfStandby})
defer cluster.Cleanup()
leaderClient := cluster.Cores[0].Client
@ -855,7 +888,7 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) {
func TestRaft_SnapshotAPI_DifferentCluster(t *testing.T) {
t.Parallel()
cluster := raftCluster(t)
cluster := raftCluster(t, nil)
defer cluster.Cleanup()
leaderClient := cluster.Cores[0].Client
@ -901,7 +934,7 @@ func TestRaft_SnapshotAPI_DifferentCluster(t *testing.T) {
// Cluster 2
{
cluster2 := raftCluster(t)
cluster2 := raftCluster(t, nil)
defer cluster2.Cleanup()
leaderClient := cluster2.Cores[0].Client
@ -948,7 +981,7 @@ func TestRaft_SnapshotAPI_DifferentCluster(t *testing.T) {
}
func BenchmarkRaft_SingleNode(b *testing.B) {
cluster := raftCluster(b)
cluster := raftCluster(b, nil)
defer cluster.Cleanup()
leaderClient := cluster.Cores[0].Client

View File

@ -1,13 +1,14 @@
package sealmigration
import (
"sync/atomic"
"testing"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/vault/helper/testhelpers"
"github.com/hashicorp/vault/helper/testhelpers/teststorage"
"github.com/hashicorp/vault/sdk/helper/logging"
"github.com/hashicorp/vault/vault"
"sync/atomic"
"testing"
)
type testFunc func(t *testing.T, logger hclog.Logger, storage teststorage.ReusableStorage, basePort int)

View File

@ -4,19 +4,20 @@ import (
"context"
"encoding/base64"
"fmt"
"testing"
"time"
"github.com/go-test/deep"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-kms-wrapping"
wrapping "github.com/hashicorp/go-kms-wrapping"
"github.com/hashicorp/vault/api"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/helper/testhelpers"
"github.com/hashicorp/vault/helper/testhelpers/seal"
sealhelper "github.com/hashicorp/vault/helper/testhelpers/seal"
"github.com/hashicorp/vault/helper/testhelpers/teststorage"
"github.com/hashicorp/vault/http"
"github.com/hashicorp/vault/physical/raft"
"github.com/hashicorp/vault/vault"
"testing"
"time"
)
const (
@ -206,6 +207,7 @@ func migrateFromTransitToShamir_Pre14(t *testing.T, logger hclog.Logger, storage
UnwrapSealFunc: sealFunc,
}
storage.Setup(&conf, &opts)
conf.DisableAutopilot = true
cluster := vault.NewTestCluster(t, &conf, &opts)
cluster.Start()
defer func() {
@ -267,7 +269,9 @@ func migrateFromTransitToShamir_Pre14(t *testing.T, logger hclog.Logger, storage
func migrateFromShamirToTransit_Pre14(t *testing.T, logger hclog.Logger, storage teststorage.ReusableStorage, basePort int, tss *sealhelper.TransitSealServer, rootToken string, recoveryKeys [][]byte) func() vault.Seal {
var baseClusterPort = basePort + 10
var conf = vault.CoreConfig{}
var conf = vault.CoreConfig{
DisableAutopilot: true,
}
var opts = vault.TestClusterOptions{
Logger: logger.Named("migrateFromShamirToTransit"),
HandlerFunc: http.Handler,
@ -561,7 +565,9 @@ func initializeShamir(t *testing.T, logger hclog.Logger, storage teststorage.Reu
var baseClusterPort = basePort + 10
// Start the cluster
var conf = vault.CoreConfig{}
var conf = vault.CoreConfig{
DisableAutopilot: true,
}
var opts = vault.TestClusterOptions{
Logger: logger.Named("initializeShamir"),
HandlerFunc: http.Handler,
@ -612,7 +618,9 @@ func runShamir(t *testing.T, logger hclog.Logger, storage teststorage.ReusableSt
var baseClusterPort = basePort + 10
// Start the cluster
var conf = vault.CoreConfig{}
var conf = vault.CoreConfig{
DisableAutopilot: true,
}
var opts = vault.TestClusterOptions{
Logger: logger.Named("runShamir"),
HandlerFunc: http.Handler,
@ -681,7 +689,9 @@ func InitializeTransit(t *testing.T, logger hclog.Logger, storage teststorage.Re
var baseClusterPort = basePort + 10
// Start the cluster
var conf = vault.CoreConfig{}
var conf = vault.CoreConfig{
DisableAutopilot: true,
}
var opts = vault.TestClusterOptions{
Logger: logger.Named("initializeTransit"),
HandlerFunc: http.Handler,
@ -734,7 +744,9 @@ func runAutoseal(t *testing.T, logger hclog.Logger, storage teststorage.Reusable
var baseClusterPort = basePort + 10
// Start the cluster
var conf = vault.CoreConfig{}
var conf = vault.CoreConfig{
DisableAutopilot: true,
}
var opts = vault.TestClusterOptions{
Logger: logger.Named("runTransit"),
HandlerFunc: http.Handler,

View File

@ -5,16 +5,19 @@ import (
"crypto/subtle"
"encoding/base64"
"errors"
"fmt"
"strings"
"time"
"github.com/hashicorp/vault/sdk/framework"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/vault/sdk/physical"
proto "github.com/golang/protobuf/proto"
wrapping "github.com/hashicorp/go-kms-wrapping"
uuid "github.com/hashicorp/go-uuid"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/physical/raft"
"github.com/hashicorp/vault/sdk/framework"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/vault/sdk/physical"
)
// raftStoragePaths returns paths for use when raft is the storage mechanism.
@ -145,6 +148,60 @@ func (b *SystemBackend) raftStoragePaths() []*framework.Path {
HelpSynopsis: strings.TrimSpace(sysRaftHelp["raft-snapshot-force"][0]),
HelpDescription: strings.TrimSpace(sysRaftHelp["raft-snapshot-force"][1]),
},
{
Pattern: "storage/raft/autopilot/state",
Operations: map[logical.Operation]framework.OperationHandler{
logical.ReadOperation: &framework.PathOperation{
Callback: b.handleStorageRaftAutopilotState(),
Summary: "Returns the state of the raft cluster under integrated storage as seen by autopilot.",
},
},
HelpSynopsis: strings.TrimSpace(sysRaftHelp["raft-autopilot-state"][0]),
HelpDescription: strings.TrimSpace(sysRaftHelp["raft-autopilot-state"][1]),
},
{
Pattern: "storage/raft/autopilot/configuration",
Fields: map[string]*framework.FieldSchema{
"cleanup_dead_servers": {
Type: framework.TypeBool,
Description: "Controls whether to remove dead servers from the Raft peer list periodically or when a new server joins.",
},
"last_contact_threshold": {
Type: framework.TypeDurationSecond,
Description: "Limit on the amount of time a server can go without leader contact before being considered unhealthy.",
},
"dead_server_last_contact_threshold": {
Type: framework.TypeDurationSecond,
Description: "Limit on the amount of time a server can go without leader contact before being considered failed. This takes effect only when cleanup_dead_servers is set.",
},
"max_trailing_logs": {
Type: framework.TypeInt,
Description: "Amount of entries in the Raft Log that a server can be behind before being considered unhealthy.",
},
"min_quorum": {
Type: framework.TypeInt,
Description: "Minimum number of servers allowed in a cluster before autopilot can prune dead servers. This should at least be 3.",
},
"server_stabilization_time": {
Type: framework.TypeDurationSecond,
Description: "Minimum amount of time a server must be in a stable, healthy state before it can be added to the cluster.",
},
},
Operations: map[logical.Operation]framework.OperationHandler{
logical.ReadOperation: &framework.PathOperation{
Callback: b.handleStorageRaftAutopilotConfigRead(),
},
logical.UpdateOperation: &framework.PathOperation{
Callback: b.handleStorageRaftAutopilotConfigUpdate(),
},
},
HelpSynopsis: strings.TrimSpace(sysRaftHelp["raft-autopilot-configuration"][0]),
HelpDescription: strings.TrimSpace(sysRaftHelp["raft-autopilot-configuration"][1]),
},
}
}
@ -184,7 +241,7 @@ func (b *SystemBackend) handleRaftRemovePeerUpdate() framework.OperationFunc {
return nil, err
}
if b.Core.raftFollowerStates != nil {
b.Core.raftFollowerStates.delete(serverID)
b.Core.raftFollowerStates.Delete(serverID)
}
return nil, nil
@ -296,8 +353,16 @@ func (b *SystemBackend) handleRaftBootstrapAnswerWrite() framework.OperationFunc
return nil, err
}
var desiredSuffrage string
switch nonVoter {
case true:
desiredSuffrage = "voter"
default:
desiredSuffrage = "non-voter"
}
if b.Core.raftFollowerStates != nil {
b.Core.raftFollowerStates.update(serverID, 0)
b.Core.raftFollowerStates.Update(serverID, 0, 0, desiredSuffrage)
}
peers, err := raftBackend.Peers(ctx)
@ -335,6 +400,139 @@ func (b *SystemBackend) handleStorageRaftSnapshotRead() framework.OperationFunc
}
}
func (b *SystemBackend) handleStorageRaftAutopilotState() framework.OperationFunc {
return func(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
raftBackend, ok := b.Core.underlyingPhysical.(*raft.RaftBackend)
if !ok {
return logical.ErrorResponse("raft storage is not in use"), logical.ErrInvalidRequest
}
state, err := raftBackend.GetAutopilotServerState(ctx)
if err != nil {
return nil, err
}
if state == nil {
return nil, nil
}
return &logical.Response{
Data: map[string]interface{}{
"execution_status": state.ExecutionStatus,
"healthy": state.Healthy,
"failure_tolerance": state.FailureTolerance,
"optimistic_failure_tolerance": state.OptimisticFailureTolerance,
"servers": state.Servers,
"leader": state.Leader,
"voters": state.Voters,
"non_voters": state.NonVoters,
},
}, nil
}
}
func (b *SystemBackend) handleStorageRaftAutopilotConfigRead() framework.OperationFunc {
return func(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
raftStorage, ok := b.Core.underlyingPhysical.(*raft.RaftBackend)
if !ok {
return logical.ErrorResponse("raft storage is not in use"), logical.ErrInvalidRequest
}
config := raftStorage.AutopilotConfig()
if config == nil {
return nil, nil
}
return &logical.Response{
Data: map[string]interface{}{
"cleanup_dead_servers": config.CleanupDeadServers,
"last_contact_threshold": config.LastContactThreshold.String(),
"dead_server_last_contact_threshold": config.DeadServerLastContactThreshold.String(),
"max_trailing_logs": config.MaxTrailingLogs,
"min_quorum": config.MinQuorum,
"server_stabilization_time": config.ServerStabilizationTime.String(),
},
}, nil
}
}
func (b *SystemBackend) handleStorageRaftAutopilotConfigUpdate() framework.OperationFunc {
return func(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
raftStorage, ok := b.Core.underlyingPhysical.(*raft.RaftBackend)
if !ok {
return logical.ErrorResponse("raft storage is not in use"), logical.ErrInvalidRequest
}
// Read autopilot configuration from storage
config, err := b.Core.loadAutopilotConfiguration(ctx)
if err != nil {
b.logger.Error("failed to load autopilot config from storage when setting up cluster; continuing since autopilot falls back to default config", "error", err)
}
if config == nil {
config = &raft.AutopilotConfig{}
}
persist := false
cleanupDeadServers, ok := d.GetOk("cleanup_dead_servers")
if ok {
if cleanupDeadServers.(bool) {
config.CleanupDeadServersValue = raft.CleanupDeadServersTrue
} else {
config.CleanupDeadServersValue = raft.CleanupDeadServersFalse
}
persist = true
}
lastContactThreshold, ok := d.GetOk("last_contact_threshold")
if ok {
config.LastContactThreshold = time.Duration(lastContactThreshold.(int)) * time.Second
persist = true
}
deadServerLastContactThreshold, ok := d.GetOk("dead_server_last_contact_threshold")
if ok {
config.DeadServerLastContactThreshold = time.Duration(deadServerLastContactThreshold.(int)) * time.Second
persist = true
}
maxTrailingLogs, ok := d.GetOk("max_trailing_logs")
if ok {
config.MaxTrailingLogs = uint64(maxTrailingLogs.(int))
persist = true
}
minQuorum, ok := d.GetOk("min_quorum")
if ok {
config.MinQuorum = uint(minQuorum.(int))
persist = true
}
serverStabilizationTime, ok := d.GetOk("server_stabilization_time")
if ok {
config.ServerStabilizationTime = time.Duration(serverStabilizationTime.(int)) * time.Second
persist = true
}
effectiveConf := raftStorage.AutopilotConfig()
effectiveConf.Merge(config)
if effectiveConf.CleanupDeadServers && effectiveConf.MinQuorum < 3 {
return logical.ErrorResponse(fmt.Sprintf("min_quorum must be set when cleanup_dead_servers is set and it should at least be 3; cleanup_dead_servers: %#v, min_quorum: %#v", effectiveConf.CleanupDeadServers, effectiveConf.MinQuorum)), logical.ErrInvalidRequest
}
// Persist only the user supplied fields
if persist {
entry, err := logical.StorageEntryJSON(raftAutopilotConfigurationStoragePath, config)
if err != nil {
return nil, err
}
if err := b.Core.barrier.Put(ctx, entry); err != nil {
return nil, err
}
}
// Set the effectiveConfig
raftStorage.SetAutopilotConfig(effectiveConf)
return nil, nil
}
}
func (b *SystemBackend) handleStorageRaftSnapshotWrite(force bool) framework.OperationFunc {
return func(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) {
raftStorage, ok := b.Core.underlyingPhysical.(*raft.RaftBackend)
@ -475,4 +673,12 @@ var sysRaftHelp = map[string][2]string{
"Force restore a raft cluster snapshot",
"",
},
"raft-autopilot-state": {
"Returns the state of the raft cluster under integrated storage as seen by autopilot.",
"",
},
"raft-autopilot-configuration": {
"Returns autopilot configuration.",
"",
},
}

View File

@ -5,7 +5,6 @@ import (
"encoding/base64"
"errors"
"fmt"
"math"
"net/http"
"net/url"
"strings"
@ -35,53 +34,12 @@ var (
raftTLSStoragePath = "core/raft/tls"
raftTLSRotationPeriod = 24 * time.Hour
raftAutopilotConfigurationStoragePath = "core/raft/autopilot/configuration"
// TestingUpdateClusterAddr is used in tests to override the cluster address
TestingUpdateClusterAddr uint32
)
type raftFollowerStates struct {
l sync.RWMutex
followers map[string]uint64
}
func (s *raftFollowerStates) update(nodeID string, appliedIndex uint64) {
s.l.Lock()
s.followers[nodeID] = appliedIndex
s.l.Unlock()
}
func (s *raftFollowerStates) delete(nodeID string) {
s.l.RLock()
delete(s.followers, nodeID)
s.l.RUnlock()
}
func (s *raftFollowerStates) get(nodeID string) uint64 {
s.l.RLock()
index := s.followers[nodeID]
s.l.RUnlock()
return index
}
func (s *raftFollowerStates) minIndex() uint64 {
var min uint64 = math.MaxUint64
minFunc := func(a, b uint64) uint64 {
if a > b {
return b
}
return a
}
s.l.RLock()
for _, i := range s.followers {
min = minFunc(min, i)
}
s.l.RUnlock()
if min == math.MaxUint64 {
return 0
}
return min
}
func (c *Core) GetRaftIndexes() (committed uint64, applied uint64) {
c.stateLock.RLock()
defer c.stateLock.RUnlock()
@ -98,7 +56,7 @@ func (c *Core) GetRaftIndexes() (committed uint64, applied uint64) {
// up and enables the cluster handler.
func (c *Core) startRaftBackend(ctx context.Context) (retErr error) {
raftBackend := c.getRaftBackend()
if raftBackend == nil || raftBackend.Initialized() {
if raftBackend == nil {
return nil
}
@ -161,6 +119,7 @@ func (c *Core) startRaftBackend(ctx context.Context) (retErr error) {
}
raftBackend.SetRestoreCallback(c.raftSnapshotRestoreCallback(true, true))
if err := raftBackend.SetupCluster(ctx, raft.SetupOpts{
TLSKeyring: raftTLS,
ClusterListener: c.getClusterListener(),
@ -198,11 +157,35 @@ func (c *Core) startRaftBackend(ctx context.Context) (retErr error) {
}
func (c *Core) setupRaftActiveNode(ctx context.Context) error {
raftBackend := c.getRaftBackend()
if raftBackend == nil {
return nil
}
c.logger.Info("starting raft active node")
autopilotConfig, err := c.loadAutopilotConfiguration(ctx)
if err != nil {
c.logger.Error("failed to load autopilot config from storage when setting up cluster; continuing since autopilot falls back to default config", "error", err)
}
raftBackend.SetupAutopilot(c.activeContext, autopilotConfig, c.raftFollowerStates, c.disableAutopilot)
c.pendingRaftPeers = &sync.Map{}
return c.startPeriodicRaftTLSRotate(ctx)
}
func (c *Core) stopRaftActiveNode() {
raftBackend := c.getRaftBackend()
if raftBackend == nil {
return
}
c.logger.Info("stopping raft active node")
if !raftBackend.AutopilotDisabled() {
raftBackend.StopAutopilot()
}
c.pendingRaftPeers = nil
c.stopPeriodicRaftTLSRotate()
}
@ -334,9 +317,8 @@ func (c *Core) raftTLSRotateDirect(ctx context.Context, logger hclog.Logger, sto
// to reconnect with the cluster. Additionally, only one outstanding key
// is allowed for this same reason (max keyring size of 2).
func (c *Core) raftTLSRotatePhased(ctx context.Context, logger hclog.Logger, raftBackend *raft.RaftBackend, stopCh chan struct{}) error {
followerStates := &raftFollowerStates{
followers: make(map[string]uint64),
}
followerStates := c.raftFollowerStates
followerStates.Clear()
// Pre-populate the follower list with the set of peers.
raftConfig, err := raftBackend.GetConfiguration(ctx)
@ -345,10 +327,9 @@ func (c *Core) raftTLSRotatePhased(ctx context.Context, logger hclog.Logger, raf
}
for _, server := range raftConfig.Servers {
if server.NodeID != raftBackend.NodeID() {
followerStates.update(server.NodeID, 0)
followerStates.Update(server.NodeID, 0, 0, "voter")
}
}
c.raftFollowerStates = followerStates
// rotateKeyring writes new key data to the keyring and adds an applied
// index that is used to verify it has been committed. The keys written in
@ -437,7 +418,7 @@ func (c *Core) raftTLSRotatePhased(ctx context.Context, logger hclog.Logger, raf
case keyring.Keys[1].AppliedIndex != keyring.AppliedIndex:
// We haven't fully committed the new key, continue here
return nil
case followerStates.minIndex() < keyring.AppliedIndex:
case followerStates.MinIndex() < keyring.AppliedIndex:
// Not all the followers have applied the latest key
return nil
}
@ -574,7 +555,7 @@ func (c *Core) stopPeriodicRaftTLSRotate() {
close(c.raftTLSRotationStopCh)
}
c.raftTLSRotationStopCh = nil
c.raftFollowerStates = nil
c.raftFollowerStates.Clear()
}
func (c *Core) checkRaftTLSKeyUpgrades(ctx context.Context) error {
@ -716,6 +697,11 @@ func (c *Core) JoinRaftCluster(ctx context.Context, leaderInfos []*raft.LeaderJo
return false, errors.New("raft backend not in use")
}
if err := raftBackend.SetDesiredSuffrage(nonVoter); err != nil {
c.logger.Error("failed to set desired suffrage for this node", "error", err)
return false, nil
}
init, err := c.InitializedLocally(ctx)
if err != nil {
return false, errwrap.Wrapf("failed to check if core is initialized: {{err}}", err)
@ -731,7 +717,7 @@ func (c *Core) JoinRaftCluster(ctx context.Context, leaderInfos []*raft.LeaderJo
// Check on seal status and storage type before proceeding:
// If raft is used for storage, core needs to be sealed
if !isRaftHAOnly && !c.Sealed() {
c.logger.Error("node must be seal before joining")
c.logger.Error("node must be sealed before joining")
return false, errors.New("node must be sealed before joining")
}
@ -1105,10 +1091,11 @@ func (c *Core) joinRaftSendAnswer(ctx context.Context, sealAccess *seal.Access,
}
raftBackend.SetRestoreCallback(c.raftSnapshotRestoreCallback(true, true))
err = raftBackend.SetupCluster(ctx, raft.SetupOpts{
opts := raft.SetupOpts{
TLSKeyring: answerResp.Data.TLSKeyring,
ClusterListener: c.getClusterListener(),
})
}
err = raftBackend.SetupCluster(ctx, opts)
if err != nil {
return errwrap.Wrapf("failed to setup raft cluster: {{err}}", err)
}
@ -1116,6 +1103,24 @@ func (c *Core) joinRaftSendAnswer(ctx context.Context, sealAccess *seal.Access,
return nil
}
func (c *Core) loadAutopilotConfiguration(ctx context.Context) (*raft.AutopilotConfig, error) {
var autopilotConfig *raft.AutopilotConfig
entry, err := c.barrier.Get(ctx, raftAutopilotConfigurationStoragePath)
if err != nil {
return nil, err
}
if entry == nil {
return nil, nil
}
if err := jsonutil.DecodeJSON(entry.Value, &autopilotConfig); err != nil {
return nil, err
}
return autopilotConfig, nil
}
// RaftBootstrap performs bootstrapping of a raft cluster if core contains a raft
// backend. If raft is not part for the storage or HA storage backend, this
// call results in an error.

View File

@ -8,6 +8,7 @@ import (
"time"
"github.com/hashicorp/vault/helper/forwarding"
"github.com/hashicorp/vault/physical/raft"
"github.com/hashicorp/vault/sdk/helper/consts"
"github.com/hashicorp/vault/vault/replication"
)
@ -17,7 +18,7 @@ type forwardedRequestRPCServer struct {
handler http.Handler
perfStandbySlots chan struct{}
perfStandbyRepCluster *replication.Cluster
raftFollowerStates *raftFollowerStates
raftFollowerStates *raft.FollowerStates
}
func (s *forwardedRequestRPCServer) ForwardRequest(ctx context.Context, freq *forwarding.Request) (*forwarding.Response, error) {
@ -73,7 +74,7 @@ func (s *forwardedRequestRPCServer) Echo(ctx context.Context, in *EchoRequest) (
}
if in.RaftAppliedIndex > 0 && len(in.RaftNodeID) > 0 && s.raftFollowerStates != nil {
s.raftFollowerStates.update(in.RaftNodeID, in.RaftAppliedIndex)
s.raftFollowerStates.Update(in.RaftNodeID, in.RaftAppliedIndex, in.RaftTerm, in.RaftDesiredSuffrage)
}
reply := &EchoReply{
@ -116,6 +117,8 @@ func (c *forwardingClient) startHeartbeat() {
if !c.core.isRaftHAOnly() {
req.RaftAppliedIndex = raftBackend.AppliedIndex()
req.RaftNodeID = raftBackend.NodeID()
req.RaftTerm = raftBackend.Term()
req.RaftDesiredSuffrage = raftBackend.DesiredSuffrage()
}
}

View File

@ -41,10 +41,12 @@ type EchoRequest struct {
ClusterAddr string `protobuf:"bytes,2,opt,name=cluster_addr,json=clusterAddr,proto3" json:"cluster_addr,omitempty"`
// ClusterAddrs is used to send up a list of cluster addresses to a dr
// primary from a dr secondary
ClusterAddrs []string `protobuf:"bytes,3,rep,name=cluster_addrs,json=clusterAddrs,proto3" json:"cluster_addrs,omitempty"`
RaftAppliedIndex uint64 `protobuf:"varint,4,opt,name=raft_applied_index,json=raftAppliedIndex,proto3" json:"raft_applied_index,omitempty"`
RaftNodeID string `protobuf:"bytes,5,opt,name=raft_node_id,json=raftNodeId,proto3" json:"raft_node_id,omitempty"`
NodeInfo *NodeInformation `protobuf:"bytes,6,opt,name=node_info,json=nodeInfo,proto3" json:"node_info,omitempty"`
ClusterAddrs []string `protobuf:"bytes,3,rep,name=cluster_addrs,json=clusterAddrs,proto3" json:"cluster_addrs,omitempty"`
RaftAppliedIndex uint64 `protobuf:"varint,4,opt,name=raft_applied_index,json=raftAppliedIndex,proto3" json:"raft_applied_index,omitempty"`
RaftNodeID string `protobuf:"bytes,5,opt,name=raft_node_id,json=raftNodeId,proto3" json:"raft_node_id,omitempty"`
NodeInfo *NodeInformation `protobuf:"bytes,6,opt,name=node_info,json=nodeInfo,proto3" json:"node_info,omitempty"`
RaftTerm uint64 `protobuf:"varint,7,opt,name=raft_term,json=raftTerm,proto3" json:"raft_term,omitempty"`
RaftDesiredSuffrage string `protobuf:"bytes,8,opt,name=raft_desired_suffrage,json=raftDesiredSuffrage,proto3" json:"raft_desired_suffrage,omitempty"`
}
func (x *EchoRequest) Reset() {
@ -121,6 +123,20 @@ func (x *EchoRequest) GetNodeInfo() *NodeInformation {
return nil
}
func (x *EchoRequest) GetRaftTerm() uint64 {
if x != nil {
return x.RaftTerm
}
return 0
}
func (x *EchoRequest) GetRaftDesiredSuffrage() string {
if x != nil {
return x.RaftDesiredSuffrage
}
return ""
}
type EchoReply struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
@ -490,8 +506,8 @@ var file_vault_request_forwarding_service_proto_rawDesc = []byte{
0x66, 0x6f, 0x72, 0x77, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x67, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69,
0x63, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x05, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x1a,
0x1d, 0x68, 0x65, 0x6c, 0x70, 0x65, 0x72, 0x2f, 0x66, 0x6f, 0x72, 0x77, 0x61, 0x72, 0x64, 0x69,
0x6e, 0x67, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xf4,
0x01, 0x0a, 0x0b, 0x45, 0x63, 0x68, 0x6f, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x18,
0x6e, 0x67, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xc5,
0x02, 0x0a, 0x0b, 0x45, 0x63, 0x68, 0x6f, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x18,
0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6c, 0x75, 0x73,
0x74, 0x65, 0x72, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b,
@ -506,73 +522,78 @@ var file_vault_request_forwarding_service_proto_rawDesc = []byte{
0x12, 0x33, 0x0a, 0x09, 0x6e, 0x6f, 0x64, 0x65, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x06, 0x20,
0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x4e, 0x6f, 0x64, 0x65,
0x49, 0x6e, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x08, 0x6e, 0x6f, 0x64,
0x65, 0x49, 0x6e, 0x66, 0x6f, 0x22, 0xfc, 0x01, 0x0a, 0x09, 0x45, 0x63, 0x68, 0x6f, 0x52, 0x65,
0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01,
0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x23, 0x0a,
0x0d, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x73, 0x18, 0x02,
0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x41, 0x64, 0x64,
0x72, 0x73, 0x12, 0x2b, 0x0a, 0x11, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f,
0x6e, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10, 0x72,
0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12,
0x2c, 0x0a, 0x12, 0x72, 0x61, 0x66, 0x74, 0x5f, 0x61, 0x70, 0x70, 0x6c, 0x69, 0x65, 0x64, 0x5f,
0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x10, 0x72, 0x61, 0x66,
0x74, 0x41, 0x70, 0x70, 0x6c, 0x69, 0x65, 0x64, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x20, 0x0a,
0x0c, 0x72, 0x61, 0x66, 0x74, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x05, 0x20,
0x01, 0x28, 0x09, 0x52, 0x0a, 0x72, 0x61, 0x66, 0x74, 0x4e, 0x6f, 0x64, 0x65, 0x49, 0x64, 0x12,
0x33, 0x0a, 0x09, 0x6e, 0x6f, 0x64, 0x65, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x06, 0x20, 0x01,
0x28, 0x0b, 0x32, 0x16, 0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x4e, 0x6f, 0x64, 0x65, 0x49,
0x6e, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x08, 0x6e, 0x6f, 0x64, 0x65,
0x49, 0x6e, 0x66, 0x6f, 0x22, 0xa9, 0x01, 0x0a, 0x0f, 0x4e, 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66,
0x6f, 0x72, 0x6d, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6c, 0x75, 0x73,
0x74, 0x65, 0x72, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b,
0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x41, 0x64, 0x64, 0x72, 0x12, 0x19, 0x0a, 0x08, 0x61,
0x70, 0x69, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x61,
0x70, 0x69, 0x41, 0x64, 0x64, 0x72, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03,
0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x6e, 0x6f,
0x64, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6e, 0x6f, 0x64,
0x65, 0x49, 0x64, 0x12, 0x2b, 0x0a, 0x11, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69,
0x6f, 0x6e, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10,
0x65, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x1b, 0x0a, 0x09, 0x72, 0x61, 0x66, 0x74, 0x5f, 0x74, 0x65,
0x72, 0x6d, 0x18, 0x07, 0x20, 0x01, 0x28, 0x04, 0x52, 0x08, 0x72, 0x61, 0x66, 0x74, 0x54, 0x65,
0x72, 0x6d, 0x12, 0x32, 0x0a, 0x15, 0x72, 0x61, 0x66, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x69, 0x72,
0x65, 0x64, 0x5f, 0x73, 0x75, 0x66, 0x66, 0x72, 0x61, 0x67, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28,
0x09, 0x52, 0x13, 0x72, 0x61, 0x66, 0x74, 0x44, 0x65, 0x73, 0x69, 0x72, 0x65, 0x64, 0x53, 0x75,
0x66, 0x66, 0x72, 0x61, 0x67, 0x65, 0x22, 0xfc, 0x01, 0x0a, 0x09, 0x45, 0x63, 0x68, 0x6f, 0x52,
0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18,
0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x23,
0x0a, 0x0d, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x73, 0x18,
0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x41, 0x64,
0x64, 0x72, 0x73, 0x12, 0x2b, 0x0a, 0x11, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69,
0x6f, 0x6e, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10,
0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x65,
0x22, 0x49, 0x0a, 0x09, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4b, 0x65, 0x79, 0x12, 0x12, 0x0a,
0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70,
0x65, 0x12, 0x0c, 0x0a, 0x01, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x01, 0x78, 0x12,
0x0c, 0x0a, 0x01, 0x79, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x01, 0x79, 0x12, 0x0c, 0x0a,
0x01, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x01, 0x64, 0x22, 0x1a, 0x0a, 0x18, 0x50,
0x65, 0x72, 0x66, 0x53, 0x74, 0x61, 0x6e, 0x64, 0x62, 0x79, 0x45, 0x6c, 0x65, 0x63, 0x74, 0x69,
0x6f, 0x6e, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x22, 0xe9, 0x01, 0x0a, 0x1b, 0x50, 0x65, 0x72, 0x66,
0x53, 0x74, 0x61, 0x6e, 0x64, 0x62, 0x79, 0x45, 0x6c, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52,
0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20,
0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x75, 0x73, 0x74,
0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x75,
0x73, 0x74, 0x65, 0x72, 0x49, 0x64, 0x12, 0x30, 0x0a, 0x14, 0x70, 0x72, 0x69, 0x6d, 0x61, 0x72,
0x79, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x03,
0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x70, 0x72, 0x69, 0x6d, 0x61, 0x72, 0x79, 0x43, 0x6c, 0x75,
0x73, 0x74, 0x65, 0x72, 0x41, 0x64, 0x64, 0x72, 0x12, 0x17, 0x0a, 0x07, 0x63, 0x61, 0x5f, 0x63,
0x65, 0x72, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x63, 0x61, 0x43, 0x65, 0x72,
0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x63, 0x65, 0x72, 0x74,
0x18, 0x05, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x43, 0x65,
0x72, 0x74, 0x12, 0x2f, 0x0a, 0x0a, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6b, 0x65, 0x79,
0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x43,
0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4b, 0x65, 0x79, 0x52, 0x09, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74,
0x4b, 0x65, 0x79, 0x32, 0xf0, 0x01, 0x0a, 0x11, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x46,
0x6f, 0x72, 0x77, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x3d, 0x0a, 0x0e, 0x46, 0x6f, 0x72,
0x77, 0x61, 0x72, 0x64, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x13, 0x2e, 0x66, 0x6f,
0x72, 0x77, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x67, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74,
0x1a, 0x14, 0x2e, 0x66, 0x6f, 0x72, 0x77, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x67, 0x2e, 0x52, 0x65,
0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x2e, 0x0a, 0x04, 0x45, 0x63, 0x68, 0x6f,
0x12, 0x12, 0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x45, 0x63, 0x68, 0x6f, 0x52, 0x65, 0x71,
0x75, 0x65, 0x73, 0x74, 0x1a, 0x10, 0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x45, 0x63, 0x68,
0x6f, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x6c, 0x0a, 0x21, 0x50, 0x65, 0x72, 0x66,
0x6f, 0x72, 0x6d, 0x61, 0x6e, 0x63, 0x65, 0x53, 0x74, 0x61, 0x6e, 0x64, 0x62, 0x79, 0x45, 0x6c,
0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1f, 0x2e,
0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x50, 0x65, 0x72, 0x66, 0x53, 0x74, 0x61, 0x6e, 0x64, 0x62,
0x79, 0x45, 0x6c, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x1a, 0x22,
0x12, 0x2c, 0x0a, 0x12, 0x72, 0x61, 0x66, 0x74, 0x5f, 0x61, 0x70, 0x70, 0x6c, 0x69, 0x65, 0x64,
0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x10, 0x72, 0x61,
0x66, 0x74, 0x41, 0x70, 0x70, 0x6c, 0x69, 0x65, 0x64, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x20,
0x0a, 0x0c, 0x72, 0x61, 0x66, 0x74, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x05,
0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x72, 0x61, 0x66, 0x74, 0x4e, 0x6f, 0x64, 0x65, 0x49, 0x64,
0x12, 0x33, 0x0a, 0x09, 0x6e, 0x6f, 0x64, 0x65, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x06, 0x20,
0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x4e, 0x6f, 0x64, 0x65,
0x49, 0x6e, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x08, 0x6e, 0x6f, 0x64,
0x65, 0x49, 0x6e, 0x66, 0x6f, 0x22, 0xa9, 0x01, 0x0a, 0x0f, 0x4e, 0x6f, 0x64, 0x65, 0x49, 0x6e,
0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6c, 0x75,
0x73, 0x74, 0x65, 0x72, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
0x0b, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x41, 0x64, 0x64, 0x72, 0x12, 0x19, 0x0a, 0x08,
0x61, 0x70, 0x69, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07,
0x61, 0x70, 0x69, 0x41, 0x64, 0x64, 0x72, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18,
0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x6e,
0x6f, 0x64, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6e, 0x6f,
0x64, 0x65, 0x49, 0x64, 0x12, 0x2b, 0x0a, 0x11, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74,
0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52,
0x10, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74,
0x65, 0x22, 0x49, 0x0a, 0x09, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4b, 0x65, 0x79, 0x12, 0x12,
0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79,
0x70, 0x65, 0x12, 0x0c, 0x0a, 0x01, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x01, 0x78,
0x12, 0x0c, 0x0a, 0x01, 0x79, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x01, 0x79, 0x12, 0x0c,
0x0a, 0x01, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x01, 0x64, 0x22, 0x1a, 0x0a, 0x18,
0x50, 0x65, 0x72, 0x66, 0x53, 0x74, 0x61, 0x6e, 0x64, 0x62, 0x79, 0x45, 0x6c, 0x65, 0x63, 0x74,
0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x22, 0xe9, 0x01, 0x0a, 0x1b, 0x50, 0x65, 0x72,
0x66, 0x53, 0x74, 0x61, 0x6e, 0x64, 0x62, 0x79, 0x45, 0x6c, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e,
0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01,
0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x75, 0x73,
0x74, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c,
0x75, 0x73, 0x74, 0x65, 0x72, 0x49, 0x64, 0x12, 0x30, 0x0a, 0x14, 0x70, 0x72, 0x69, 0x6d, 0x61,
0x72, 0x79, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18,
0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x70, 0x72, 0x69, 0x6d, 0x61, 0x72, 0x79, 0x43, 0x6c,
0x75, 0x73, 0x74, 0x65, 0x72, 0x41, 0x64, 0x64, 0x72, 0x12, 0x17, 0x0a, 0x07, 0x63, 0x61, 0x5f,
0x63, 0x65, 0x72, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x63, 0x61, 0x43, 0x65,
0x72, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x63, 0x65, 0x72,
0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x43,
0x65, 0x72, 0x74, 0x12, 0x2f, 0x0a, 0x0a, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6b, 0x65,
0x79, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e,
0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4b, 0x65, 0x79, 0x52, 0x09, 0x63, 0x6c, 0x69, 0x65, 0x6e,
0x74, 0x4b, 0x65, 0x79, 0x32, 0xf0, 0x01, 0x0a, 0x11, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74,
0x46, 0x6f, 0x72, 0x77, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x3d, 0x0a, 0x0e, 0x46, 0x6f,
0x72, 0x77, 0x61, 0x72, 0x64, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x13, 0x2e, 0x66,
0x6f, 0x72, 0x77, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x67, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73,
0x74, 0x1a, 0x14, 0x2e, 0x66, 0x6f, 0x72, 0x77, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x67, 0x2e, 0x52,
0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x2e, 0x0a, 0x04, 0x45, 0x63, 0x68,
0x6f, 0x12, 0x12, 0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x45, 0x63, 0x68, 0x6f, 0x52, 0x65,
0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x10, 0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x45, 0x63,
0x68, 0x6f, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x6c, 0x0a, 0x21, 0x50, 0x65, 0x72,
0x66, 0x6f, 0x72, 0x6d, 0x61, 0x6e, 0x63, 0x65, 0x53, 0x74, 0x61, 0x6e, 0x64, 0x62, 0x79, 0x45,
0x6c, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1f,
0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x50, 0x65, 0x72, 0x66, 0x53, 0x74, 0x61, 0x6e, 0x64,
0x62, 0x79, 0x45, 0x6c, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e,
0x73, 0x65, 0x22, 0x00, 0x30, 0x01, 0x42, 0x22, 0x5a, 0x20, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62,
0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x68, 0x61, 0x73, 0x68, 0x69, 0x63, 0x6f, 0x72, 0x70, 0x2f, 0x76,
0x61, 0x75, 0x6c, 0x74, 0x2f, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74,
0x6f, 0x33,
0x62, 0x79, 0x45, 0x6c, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x1a,
0x22, 0x2e, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x2e, 0x50, 0x65, 0x72, 0x66, 0x53, 0x74, 0x61, 0x6e,
0x64, 0x62, 0x79, 0x45, 0x6c, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f,
0x6e, 0x73, 0x65, 0x22, 0x00, 0x30, 0x01, 0x42, 0x22, 0x5a, 0x20, 0x67, 0x69, 0x74, 0x68, 0x75,
0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x68, 0x61, 0x73, 0x68, 0x69, 0x63, 0x6f, 0x72, 0x70, 0x2f,
0x76, 0x61, 0x75, 0x6c, 0x74, 0x2f, 0x76, 0x61, 0x75, 0x6c, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f,
0x74, 0x6f, 0x33,
}
var (

View File

@ -18,6 +18,8 @@ message EchoRequest {
uint64 raft_applied_index = 4;
string raft_node_id = 5;
NodeInformation node_info = 6;
uint64 raft_term = 7;
string raft_desired_suffrage = 8;
}
message EchoReply {

View File

@ -1070,7 +1070,7 @@ type TestClusterOptions struct {
// core in cluster will have 0, second 1, etc.
// If the backend is shared across the cluster (i.e. is not Raft) then it
// should return nil when coreIdx != 0.
PhysicalFactory func(t testing.T, coreIdx int, logger log.Logger) *PhysicalBackendBundle
PhysicalFactory func(t testing.T, coreIdx int, logger log.Logger, conf map[string]interface{}) *PhysicalBackendBundle
// FirstCoreNumber is used to assign a unique number to each core within
// a multi-cluster setup.
FirstCoreNumber int
@ -1091,6 +1091,8 @@ type TestClusterOptions struct {
RaftAddressProvider raftlib.ServerAddressProvider
CoreMetricSinkProvider func(clusterName string) (*metricsutil.ClusterMetricSink, *metricsutil.MetricsHelper)
PhysicalFactoryConfig map[string]interface{}
}
var DefaultNumCores = 3
@ -1456,6 +1458,7 @@ func NewTestCluster(t testing.T, base *CoreConfig, opts *TestClusterOptions) *Te
coreConfig.SecureRandomReader = base.SecureRandomReader
coreConfig.DisableSentinelTrace = base.DisableSentinelTrace
coreConfig.ClusterName = base.ClusterName
coreConfig.DisableAutopilot = base.DisableAutopilot
if base.BuiltinRegistry != nil {
coreConfig.BuiltinRegistry = base.BuiltinRegistry
@ -1763,7 +1766,7 @@ func (testCluster *TestCluster) newCore(t testing.T, idx int, coreConfig *CoreCo
localConfig.Logger = testCluster.Logger.Named(fmt.Sprintf("core%d", idx))
}
if opts != nil && opts.PhysicalFactory != nil {
physBundle := opts.PhysicalFactory(t, idx, localConfig.Logger)
physBundle := opts.PhysicalFactory(t, idx, localConfig.Logger, opts.PhysicalFactoryConfig)
switch {
case physBundle == nil && coreConfig.Physical != nil:
case physBundle == nil && coreConfig.Physical == nil:
@ -1795,6 +1798,7 @@ func (testCluster *TestCluster) newCore(t testing.T, idx int, coreConfig *CoreCo
if opts != nil && opts.ClusterLayers != nil {
localConfig.ClusterNetworkLayer = opts.ClusterLayers.Layers()[idx]
localConfig.ClusterAddr = "https://" + localConfig.ClusterNetworkLayer.Listeners()[0].Addr().String()
}
switch {

373
vendor/github.com/hashicorp/raft-autopilot/LICENSE generated vendored Normal file
View File

@ -0,0 +1,373 @@
Mozilla Public License Version 2.0
==================================
1. Definitions
--------------
1.1. "Contributor"
means each individual or legal entity that creates, contributes to
the creation of, or owns Covered Software.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used
by a Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
means Covered Software of a particular Contributor.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached
the notice in Exhibit A, the Executable Form of such Source Code
Form, and Modifications of such Source Code Form, in each case
including portions thereof.
1.5. "Incompatible With Secondary Licenses"
means
(a) that the initial Contributor has attached the notice described
in Exhibit B to the Covered Software; or
(b) that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the
terms of a Secondary License.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in
a separate file or files, that is not Covered Software.
1.8. "License"
means this document.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible,
whether at the time of the initial grant or subsequently, any and
all of the rights conveyed by this License.
1.10. "Modifications"
means any of the following:
(a) any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered
Software; or
(b) any new file in Source Code Form that contains any Covered
Software.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the
License, by the making, using, selling, offering for sale, having
made, import, or transfer of either its Contributions or its
Contributor Version.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU
Lesser General Public License, Version 2.1, the GNU Affero General
Public License, Version 3.0, or any later versions of those
licenses.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that
controls, is controlled by, or is under common control with You. For
purposes of this definition, "control" means (a) the power, direct
or indirect, to cause the direction or management of such entity,
whether by contract or otherwise, or (b) ownership of more than
fifty percent (50%) of the outstanding shares or beneficial
ownership of such entity.
2. License Grants and Conditions
--------------------------------
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
(a) under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
(b) under Patent Claims of such Contributor to make, use, sell, offer
for sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
(a) for any code that a Contributor has removed from Covered Software;
or
(b) for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
(c) under Patent Claims infringed by Covered Software in the absence of
its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights
to grant the rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
in Section 2.1.
3. Responsibilities
-------------------
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
(a) such Covered Software must also be made available in Source Code
Form, as described in Section 3.1, and You must inform recipients of
the Executable Form how they can obtain a copy of such Source Code
Form by reasonable means in a timely manner, at a charge no more
than the cost of distribution to the recipient; and
(b) You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter
the recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
3.4. Notices
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty,
or limitations of liability) contained within the Source Code Form of
the Covered Software, except that You may alter any license notices to
the extent required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
---------------------------------------------------
If it is impossible for You to comply with any of the terms of this
License with respect to some or all of the Covered Software due to
statute, judicial order, or regulation then You must: (a) comply with
the terms of this License to the maximum extent possible; and (b)
describe the limitations and the code they affect. Such description must
be placed in a text file included with all distributions of the Covered
Software under this License. Except to the extent prohibited by statute
or regulation, such description must be sufficiently detailed for a
recipient of ordinary skill to be able to understand it.
5. Termination
--------------
5.1. The rights granted under this License will terminate automatically
if You fail to comply with any of its terms. However, if You become
compliant, then the rights granted under this License from a particular
Contributor are reinstated (a) provisionally, unless and until such
Contributor explicitly and finally terminates Your grants, and (b) on an
ongoing basis, if such Contributor fails to notify You of the
non-compliance by some reasonable means prior to 60 days after You have
come back into compliance. Moreover, Your grants from a particular
Contributor are reinstated on an ongoing basis if such Contributor
notifies You of the non-compliance by some reasonable means, this is the
first time You have received notice of non-compliance with this License
from such Contributor, and You become compliant prior to 30 days after
Your receipt of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
end user license agreements (excluding distributors and resellers) which
have been validly granted by You or Your distributors under this License
prior to termination shall survive termination.
************************************************************************
* *
* 6. Disclaimer of Warranty *
* ------------------------- *
* *
* Covered Software is provided under this License on an "as is" *
* basis, without warranty of any kind, either expressed, implied, or *
* statutory, including, without limitation, warranties that the *
* Covered Software is free of defects, merchantable, fit for a *
* particular purpose or non-infringing. The entire risk as to the *
* quality and performance of the Covered Software is with You. *
* Should any Covered Software prove defective in any respect, You *
* (not any Contributor) assume the cost of any necessary servicing, *
* repair, or correction. This disclaimer of warranty constitutes an *
* essential part of this License. No use of any Covered Software is *
* authorized under this License except under this disclaimer. *
* *
************************************************************************
************************************************************************
* *
* 7. Limitation of Liability *
* -------------------------- *
* *
* Under no circumstances and under no legal theory, whether tort *
* (including negligence), contract, or otherwise, shall any *
* Contributor, or anyone who distributes Covered Software as *
* permitted above, be liable to You for any direct, indirect, *
* special, incidental, or consequential damages of any character *
* including, without limitation, damages for lost profits, loss of *
* goodwill, work stoppage, computer failure or malfunction, or any *
* and all other commercial damages or losses, even if such party *
* shall have been informed of the possibility of such damages. This *
* limitation of liability shall not apply to liability for death or *
* personal injury resulting from such party's negligence to the *
* extent applicable law prohibits such limitation. Some *
* jurisdictions do not allow the exclusion or limitation of *
* incidental or consequential damages, so this exclusion and *
* limitation may not apply to You. *
* *
************************************************************************
8. Litigation
-------------
Any litigation relating to this License may be brought only in the
courts of a jurisdiction where the defendant maintains its principal
place of business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions.
Nothing in this Section shall prevent a party's ability to bring
cross-claims or counter-claims.
9. Miscellaneous
----------------
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides
that the language of a contract shall be construed against the drafter
shall not be used to construe this License against a Contributor.
10. Versions of the License
---------------------------
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses
If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
-------------------------------------------
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
file in a relevant directory) where a recipient would be likely to look
for such a notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------
This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.

2
vendor/github.com/hashicorp/raft-autopilot/README.md generated vendored Normal file
View File

@ -0,0 +1,2 @@
# raft-autopilot
Raft Autopilot

234
vendor/github.com/hashicorp/raft-autopilot/autopilot.go generated vendored Normal file
View File

@ -0,0 +1,234 @@
package autopilot
import (
"context"
"sync"
"time"
hclog "github.com/hashicorp/go-hclog"
"github.com/hashicorp/raft"
)
const (
// These constants were take from what exists in Consul at the time of module extraction.
DefaultUpdateInterval = 2 * time.Second
DefaultReconcileInterval = 10 * time.Second
)
// Option is an option to be used when creating a new Autopilot instance
type Option func(*Autopilot)
// WithUpdateInterval returns an Option to set the Autopilot instance's
// update interval.
func WithUpdateInterval(t time.Duration) Option {
if t == 0 {
t = DefaultUpdateInterval
}
return func(a *Autopilot) {
a.updateInterval = t
}
}
// WithReconcileInterval returns an Option to set the Autopilot instance's
// reconcile interval.
func WithReconcileInterval(t time.Duration) Option {
if t == 0 {
t = DefaultReconcileInterval
}
return func(a *Autopilot) {
a.reconcileInterval = t
}
}
// WithLogger returns an Option to set the Autopilot instance's logger
func WithLogger(logger hclog.Logger) Option {
if logger == nil {
logger = hclog.Default()
}
return func(a *Autopilot) {
a.logger = logger.Named("autopilot")
}
}
// withTimeProvider returns an Option which overrides and Autopilot instance's
// time provider with the given one. This should only be used in tests
// as a means of making some time.Time values in an autopilot state deterministic.
// For real uses the default runtimeTimeProvider should be used.
func withTimeProvider(provider timeProvider) Option {
return func(a *Autopilot) {
a.time = provider
}
}
// WithPromoter returns an option to set the Promoter type that Autpilot will
// use. When the option is not given the default StablePromoter from this package
// will be used.
func WithPromoter(promoter Promoter) Option {
if promoter == nil {
promoter = DefaultPromoter()
}
return func(a *Autopilot) {
a.promoter = promoter
}
}
// ExecutionStatus represents the current status of the autopilot background go routines
type ExecutionStatus string
const (
NotRunning ExecutionStatus = "not-running"
Running ExecutionStatus = "running"
ShuttingDown ExecutionStatus = "shutting-down"
)
type execInfo struct {
// status is the current state of autopilot executation
status ExecutionStatus
// shutdown is a function that can be execute to shutdown a running
// autopilot's go routines.
shutdown context.CancelFunc
// done is a chan that will be closed when the running autopilot go
// routines have exited. Technically closing it is the very last
// thing done in the go routine but at that point enough state has
// been cleaned up that we would then allow it to be started
// immediately afterward
done chan struct{}
}
// Autopilot is the type to manage a running Raft instance.
//
// Each Raft node in the cluster will have a corresponding Autopilot instance but
// only 1 Autopilot instance should run at a time in the cluster. So when a node
// gains Raft leadership the corresponding Autopilot instance should have it's
// Start method called. Then if leadership is lost that node should call the
// Stop method on the Autopilot instance.
type Autopilot struct {
logger hclog.Logger
// delegate is used to get information about the system such as Raft server
// states, known servers etc.
delegate ApplicationIntegration
// promoter is used to calculate promotions, demotions and leadership transfers
// given a particular autopilot State. The interface also contains methods
// for filling in parts of the autopilot state that the core module doesn't
// control such as the Ext fields on the Server and State types.
promoter Promoter
// raft is an interface that implements all the parts of the Raft library interface
// that we use. It is an interface to allow for mocking raft during testing.
raft Raft
// time is an interface with a single method for getting the current time - `Now`.
// In some tests this will be the MockTimeProvider which allows tests to be more
// deterministic but for running systems this should not be overrided from the
// default which is the runtimeTimeProvider and is a small shim around calling
// time.Now.
time timeProvider
// reconcileInterval is how long between rounds of performing promotions, demotions
// and leadership transfers.
reconcileInterval time.Duration
// updateInterval is the time between the periodic state updates. These periodic
// state updates take in known servers from the delegate, request Raft stats be
// fetched and pull in other inputs such as the Raft configuration to create
// an updated view of the Autopilot State.
updateInterval time.Duration
// state is the structure that autopilot uses to make decisions about what to do.
// This field should be considered immutable and no modifications to an existing
// state should be made but instead a new state is created and set to this field
// while holding the stateLock.
state *State
// stateLock is meant to only protect the state field. This just prevents
// the periodic state update and consumers requesting the autopilot state from
// racing.
stateLock sync.RWMutex
// startTime is recorded so that we can make better determinations about server
// stability during the initial period of time after autopilot first starts.
// If autopilot has just started the default behavior to check if a server is
// stable will not work as it will ensure the server has been healthy for
// the configured server stabilization time. If that configure time is longer
// than the amount of time autopilot has been running you can run into issues
// with leadership flapping during some scenarios where a cluster is being
// brought up.
startTime time.Time
// removeDeadCh is used to trigger the running autopilot go routines to
// find and remove any dead/failed servers
removeDeadCh chan struct{}
// reconcileCh is used to trigger an immediate round of reconciliation.
reconcileCh chan struct{}
// leaderLock implements a cancellable mutex that will be used to ensure
// that only one autopilot go routine is the "leader". The leader is
// the go routine that is currently responsible for updating the
// autopilot state and performing raft promotions/demotions.
leaderLock *mutex
// execution is the information about the most recent autopilot execution.
// Start will initialize this with the most recent execution and it will
// be updated by Stop and by the go routines being executed when they are
// finished.
execution *execInfo
// execLock protects access to the execution field
execLock sync.Mutex
}
// New will create a new Autopilot instance utilizing the given Raft and Delegate.
// If the WithPromoter option is not provided the default StablePromoter will
// be used.
func New(raft Raft, delegate ApplicationIntegration, options ...Option) *Autopilot {
a := &Autopilot{
raft: raft,
delegate: delegate,
state: &State{},
promoter: DefaultPromoter(),
logger: hclog.Default().Named("autopilot"),
// should this be buffered?
removeDeadCh: make(chan struct{}, 1),
reconcileInterval: DefaultReconcileInterval,
updateInterval: DefaultUpdateInterval,
time: &runtimeTimeProvider{},
leaderLock: newMutex(),
}
for _, opt := range options {
opt(a)
}
return a
}
// RemoveDeadServers will trigger an immediate removal of dead/failed servers.
func (a *Autopilot) RemoveDeadServers() {
select {
case a.removeDeadCh <- struct{}{}:
default:
}
}
// GetState retrieves the current autopilot State
func (a *Autopilot) GetState() *State {
a.stateLock.Lock()
defer a.stateLock.Unlock()
return a.state
}
// GetServerHealth returns the latest ServerHealth for a given server.
// The returned struct should not be modified or else it will im
func (a *Autopilot) GetServerHealth(id raft.ServerID) *ServerHealth {
state := a.GetState()
srv, ok := state.Servers[id]
if ok {
return &srv.Health
}
return nil
}

11
vendor/github.com/hashicorp/raft-autopilot/go.mod generated vendored Normal file
View File

@ -0,0 +1,11 @@
module github.com/hashicorp/raft-autopilot
go 1.14
require (
github.com/hashicorp/go-hclog v0.14.1
github.com/hashicorp/raft v1.2.0
github.com/stretchr/testify v1.6.1
go.uber.org/goleak v1.1.10
golang.org/x/sync v0.0.0-20190423024810-112230192c58
)

87
vendor/github.com/hashicorp/raft-autopilot/go.sum generated vendored Normal file
View File

@ -0,0 +1,87 @@
github.com/DataDog/datadog-go v2.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878 h1:EFSB7Zo9Eg91v7MJPVsifUysc/wPdN+NOnVe6bWbdBM=
github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-hclog v0.9.1 h1:9PZfAcVEvez4yhLH2TBU64/h/z4xlFI80cWXRrxuKuM=
github.com/hashicorp/go-hclog v0.9.1/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-hclog v0.14.1 h1:nQcJDQwIAGnmoUWp8ubocEX40cCml/17YkF6csQLReU=
github.com/hashicorp/go-hclog v0.14.1/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=
github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0=
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI=
github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
github.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM=
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/raft v1.2.0 h1:mHzHIrF0S91d3A7RPBvuqkgB4d/7oFJZyvf1Q4m7GA0=
github.com/hashicorp/raft v1.2.0/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA=
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.10 h1:qxFzApOv4WsAL965uUPIsXzAKCZxN2p9UqdhFS4ZW10=
github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
go.uber.org/goleak v1.1.10 h1:z+mqJhf6ss6BSfSM671tgKyZBFPTTJM+HLxnhPC3wu0=
go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f h1:Bl/8QSvNqXvPGPGXa2z5xUTmV7VDcZyvRZ+QQXkXTZQ=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190523142557-0e01d883c5c5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191008105621-543471e840be h1:QAcqgptGM8IQBC9K/RC4o+O9YmqEm0diQn9QmZw/0mU=
golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20191108193012-7d206e10da11 h1:Yq9t9jnGoR+dBuitxdo9l6Q7xh/zOyNnYUtDKaQ3x0E=
golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

35
vendor/github.com/hashicorp/raft-autopilot/mutex.go generated vendored Normal file
View File

@ -0,0 +1,35 @@
/*
This code was taken from the same implementation in a branch from Consul and then
had the package updated and the mutex type unexported.
*/
package autopilot
import (
"context"
"golang.org/x/sync/semaphore"
)
type mutex semaphore.Weighted
// New returns a Mutex that is ready for use.
func newMutex() *mutex {
return (*mutex)(semaphore.NewWeighted(1))
}
func (m *mutex) Lock() {
_ = (*semaphore.Weighted)(m).Acquire(context.Background(), 1)
}
func (m *mutex) Unlock() {
(*semaphore.Weighted)(m).Release(1)
}
// TryLock acquires the mutex, blocking until resources are available or ctx is
// done. On success, returns nil. On failure, returns ctx.Err() and leaves the
// semaphore unchanged.
//
// If ctx is already done, Acquire may still succeed without blocking.
func (m *mutex) TryLock(ctx context.Context) error {
return (*semaphore.Weighted)(m).Acquire(ctx, 1)
}

201
vendor/github.com/hashicorp/raft-autopilot/raft.go generated vendored Normal file
View File

@ -0,0 +1,201 @@
package autopilot
//
// The methods in this file are all mainly to provide synchronous methods
// for Raft operations that would normally return futures.
//
import (
"fmt"
"strconv"
"github.com/hashicorp/raft"
)
func requiredQuorum(voters int) int {
return (voters / 2) + 1
}
// NumVoters is a helper for calculating the number of voting peers in the
// current raft configuration. This function ignores any autopilot state
// and will make the calculation based on a newly retrieved Raft configuration.
func (a *Autopilot) NumVoters() (int, error) {
cfg, err := a.getRaftConfiguration()
if err != nil {
return 0, err
}
var numVoters int
for _, server := range cfg.Servers {
if server.Suffrage == raft.Voter {
numVoters++
}
}
return numVoters, nil
}
// AddServer is a helper for adding a new server to the raft configuration.
// This may remove servers with duplicate addresses or ids first and after
// its all done will trigger autopilot to remove dead servers if there
// are any. Servers added by this method will start in a non-voting
// state and later on autopilot will promote them to voting status
// if desired by the configured promoter. If too many removals would
// be required that would cause leadership loss then an error is returned
// instead of performing any Raft configuration changes.
func (a *Autopilot) AddServer(s *Server) error {
cfg, err := a.getRaftConfiguration()
if err != nil {
a.logger.Error("failed to get raft configuration", "error", err)
return err
}
var existingVoter bool
var voterRemovals []raft.ServerID
var nonVoterRemovals []raft.ServerID
var numVoters int
for _, server := range cfg.Servers {
if server.Suffrage == raft.Voter {
numVoters++
}
if server.Address == s.Address && server.ID == s.ID {
// nothing to be done as the addr and ID both already match
return nil
} else if server.ID == s.ID {
// special case for address updates only. In this case we should be
// able to update the configuration without have to first remove the server
if server.Suffrage == raft.Voter || server.Suffrage == raft.Staging {
existingVoter = true
}
} else if server.Address == s.Address {
if server.Suffrage == raft.Voter {
voterRemovals = append(voterRemovals, server.ID)
} else {
nonVoterRemovals = append(nonVoterRemovals, server.ID)
}
}
}
requiredVoters := requiredQuorum(numVoters)
if len(voterRemovals) > numVoters-requiredVoters {
return fmt.Errorf("Preventing server addition that would require removal of too many servers and cause cluster instability")
}
for _, id := range voterRemovals {
if err := a.removeServer(id); err != nil {
return fmt.Errorf("error removing server %q with duplicate address %q: %w", id, s.Address, err)
}
a.logger.Info("removed server with duplicate address", "address", s.Address)
}
for _, id := range nonVoterRemovals {
if err := a.removeServer(id); err != nil {
return fmt.Errorf("error removing server %q with duplicate address %q: %w", id, s.Address, err)
}
a.logger.Info("removed server with duplicate address", "address", s.Address)
}
if existingVoter {
if err := a.addVoter(s.ID, s.Address); err != nil {
return err
}
} else {
if err := a.addNonVoter(s.ID, s.Address); err != nil {
return err
}
}
// Trigger a check to remove dead servers
a.RemoveDeadServers()
return nil
}
// RemoveServer is a helper to remove a server from Raft if it
// exists in the latest Raft configuration
func (a *Autopilot) RemoveServer(id raft.ServerID) error {
cfg, err := a.getRaftConfiguration()
if err != nil {
a.logger.Error("failed to get raft configuration", "error", err)
return err
}
// only remove servers currently in the configuration
for _, server := range cfg.Servers {
if server.ID == id {
return a.removeServer(server.ID)
}
}
return nil
}
// addNonVoter is a wrapper around calling the AddNonVoter method on the Raft
// interface object provided to Autopilot
func (a *Autopilot) addNonVoter(id raft.ServerID, addr raft.ServerAddress) error {
addFuture := a.raft.AddNonvoter(id, addr, 0, 0)
if err := addFuture.Error(); err != nil {
a.logger.Error("failed to add raft non-voting peer", "id", id, "address", addr, "error", err)
return err
}
return nil
}
// addVoter is a wrapper around calling the AddVoter method on the Raft
// interface object provided to Autopilot
func (a *Autopilot) addVoter(id raft.ServerID, addr raft.ServerAddress) error {
addFuture := a.raft.AddVoter(id, addr, 0, 0)
if err := addFuture.Error(); err != nil {
a.logger.Error("failed to add raft voting peer", "id", id, "address", addr, "error", err)
return err
}
return nil
}
func (a *Autopilot) demoteVoter(id raft.ServerID) error {
removeFuture := a.raft.DemoteVoter(id, 0, 0)
if err := removeFuture.Error(); err != nil {
a.logger.Error("failed to demote raft peer", "id", id, "error", err)
return err
}
return nil
}
// removeServer is a wrapper around calling the RemoveServer method on the
// Raft interface object provided to Autopilot
func (a *Autopilot) removeServer(id raft.ServerID) error {
a.logger.Debug("removing server by ID", "id", id)
future := a.raft.RemoveServer(id, 0, 0)
if err := future.Error(); err != nil {
a.logger.Error("failed to remove raft server",
"id", id,
"error", err,
)
return err
}
a.logger.Info("removed server", "id", id)
return nil
}
// getRaftConfiguration a wrapper arond calling the GetConfiguration method
// on the Raft interface object provided to Autopilot
func (a *Autopilot) getRaftConfiguration() (*raft.Configuration, error) {
configFuture := a.raft.GetConfiguration()
if err := configFuture.Error(); err != nil {
return nil, err
}
cfg := configFuture.Configuration()
return &cfg, nil
}
// lastTerm will retrieve the raft stats and then pull the last term value out of it
func (a *Autopilot) lastTerm() (uint64, error) {
return strconv.ParseUint(a.raft.Stats()["last_log_term"], 10, 64)
}
// leadershipTransfer will transfer leadership to the server with the specified id and address
func (a *Autopilot) leadershipTransfer(id raft.ServerID, address raft.ServerAddress) error {
a.logger.Info("Transferring leadership to new server", "id", id, "address", address)
future := a.raft.LeadershipTransferToServer(id, address)
return future.Error()
}

281
vendor/github.com/hashicorp/raft-autopilot/reconcile.go generated vendored Normal file
View File

@ -0,0 +1,281 @@
package autopilot
import (
"fmt"
"sort"
"github.com/hashicorp/raft"
)
// reconcile calculates and then applies promotions and demotions
func (a *Autopilot) reconcile() error {
conf := a.delegate.AutopilotConfig()
if conf == nil {
return nil
}
// grab the current state while locked
a.stateLock.Lock()
state := a.state
a.stateLock.Unlock()
if state == nil || state.Leader == "" {
return fmt.Errorf("Cannote reconcile Raft server voting rights without a valid autopilot state")
}
// have the promoter calculate the required Raft changeset.
changes := a.promoter.CalculatePromotionsAndDemotions(conf, state)
// apply the promotions, if we did apply any then stop here
// as we do not want to apply the demotions at the same time
// as a means of preventing cluster instability.
if done, err := a.applyPromotions(state, changes); done {
return err
}
// apply the demotions, if we did apply any then stop here
// as we do not want to transition leadership and do demotions
// at the same time. This is a preventative measure to maintain
// cluster stability.
if done, err := a.applyDemotions(state, changes); done {
return err
}
// if no leadership transfer is desired then we can exit the method now.
if changes.Leader == "" || changes.Leader == state.Leader {
return nil
}
// lookup the server we want to transfer leadership to
srv, ok := state.Servers[changes.Leader]
if !ok {
return fmt.Errorf("cannot transfer leadership to an unknown server with ID %s", changes.Leader)
}
// perform the leadership transfer
return a.leadershipTransfer(changes.Leader, srv.Server.Address)
}
// applyPromotions will apply all the promotions in the RaftChanges parameter.
//
// IDs in the change set will be ignored if:
// * The server isn't tracked in the provided state
// * The server already has voting rights
// * The server is not healthy
//
// If any servers were promoted this function returns true for the bool value.
func (a *Autopilot) applyPromotions(state *State, changes RaftChanges) (bool, error) {
promoted := false
for _, change := range changes.Promotions {
srv, found := state.Servers[change]
if !found {
a.logger.Debug("Ignoring promotion of server as it is not in the autopilot state", "id", change)
// this shouldn't be able to happen but is a nice safety measure against the
// delegate doing something less than desirable
continue
}
if srv.HasVotingRights() {
// There is no need to promote as this server is already a voter.
// No logging is needed here as this could be a very common case
// where the promoter just returns a lists of server ids that should
// be voters and non-voters without caring about which ones currently
// already are in that state.
a.logger.Debug("Not promoting server that already has voting rights", "id", change)
continue
}
if !srv.Health.Healthy {
// do not promote unhealthy servers
a.logger.Debug("Ignoring promotion of unhealthy server", "id", change)
continue
}
a.logger.Info("Promoting server", "id", srv.Server.ID, "address", srv.Server.Address, "name", srv.Server.Name)
if err := a.addVoter(srv.Server.ID, srv.Server.Address); err != nil {
return true, fmt.Errorf("failed promoting server %s: %v", srv.Server.ID, err)
}
promoted = true
}
// when we promoted anything we return true to indicate that the promotion/demotion applying
// process is finished to prevent promotions and demotions in the same round. This is what
// autopilot within Consul used to do so I am keeping the behavior the same for now.
return promoted, nil
}
// applyDemotions will apply all the demotions in the RaftChanges parameter.
//
// IDs in the change set will be ignored if:
// * The server isn't tracked in the provided state
// * The server does not have voting rights
//
// If any servers were demoted this function returns true for the bool value.
func (a *Autopilot) applyDemotions(state *State, changes RaftChanges) (bool, error) {
demoted := false
for _, change := range changes.Demotions {
srv, found := state.Servers[change]
if !found {
a.logger.Debug("Ignoring demotion of server as it is not in the autopilot state", "id", change)
// this shouldn't be able to happen but is a nice safety measure against the
// delegate doing something less than desirable
continue
}
if srv.State == RaftNonVoter {
// There is no need to demote as this server is already a non-voter.
// No logging is needed here as this could be a very common case
// where the promoter just returns a lists of server ids that should
// be voters and non-voters without caring about which ones currently
// already are in that state.
a.logger.Debug("Ignoring demotion of server that is already a non-voter", "id", change)
continue
}
a.logger.Info("Demoting server", "id", srv.Server.ID, "address", srv.Server.Address, "name", srv.Server.Name)
if err := a.demoteVoter(srv.Server.ID); err != nil {
return true, fmt.Errorf("failed demoting server %s: %v", srv.Server.ID, err)
}
demoted = true
}
// similarly to applyPromotions here we want to stop the process and prevent leadership
// transfer when any demotions took place. Basically we want to ensure the cluster is
// stable before doing the transfer
return demoted, nil
}
// getFailedServers aggregates all of the information about servers that the consuming application believes are in
// a failed/left state (indicated by the NodeStatus field on the Server type) as well as stale servers that are
// in the raft configuration but not know to the consuming application. This function will do nothing with
// that information and is purely to collect the data.
func (a *Autopilot) getFailedServers() (*FailedServers, int, error) {
staleRaftServers := make(map[raft.ServerID]raft.Server)
raftConfig, err := a.getRaftConfiguration()
if err != nil {
return nil, 0, err
}
// Populate a map of all the raft servers. We will
// remove some later on from the map leaving us with
// just the stale servers.
var voters int
for _, server := range raftConfig.Servers {
staleRaftServers[server.ID] = server
if server.Suffrage == raft.Voter {
voters++
}
}
var failed FailedServers
for id, srv := range a.delegate.KnownServers() {
raftSrv, found := staleRaftServers[id]
if found {
delete(staleRaftServers, id)
}
if srv.NodeStatus != NodeAlive {
if found && raftSrv.Suffrage == raft.Voter {
failed.FailedVoters = append(failed.FailedVoters, srv)
} else if found {
failed.FailedNonVoters = append(failed.FailedNonVoters, srv)
}
}
}
for id, srv := range staleRaftServers {
if srv.Suffrage == raft.Voter {
failed.StaleVoters = append(failed.StaleVoters, id)
} else {
failed.StaleNonVoters = append(failed.StaleNonVoters, id)
}
}
sort.Slice(failed.StaleNonVoters, func(i, j int) bool {
return failed.StaleNonVoters[i] < failed.StaleNonVoters[j]
})
sort.Slice(failed.StaleVoters, func(i, j int) bool {
return failed.StaleVoters[i] < failed.StaleVoters[j]
})
sort.Slice(failed.FailedNonVoters, func(i, j int) bool {
return failed.FailedNonVoters[i].ID < failed.FailedNonVoters[j].ID
})
sort.Slice(failed.FailedVoters, func(i, j int) bool {
return failed.FailedVoters[i].ID < failed.FailedVoters[j].ID
})
return &failed, voters, nil
}
// pruneDeadServers will find stale raft servers and failed servers as indicated by the consuming application
// and remove them. For stale raft servers this means removing them from the Raft configuration. For failed
// servers this means issuing RemoveFailedNode calls to the delegate. All stale/failed non-voters will be
// removed first. Then stale voters and finally failed servers. For servers with voting rights we will
// cap the number removed so that we do not remove too many at a time and do not remove nodes to the
// point where the number of voters would be below the MinQuorum value from the autopilot config.
// Additionally the delegate will be consulted to determine if all of the removals should be done and
// can filter the failed servers listings if need be.
func (a *Autopilot) pruneDeadServers() error {
conf := a.delegate.AutopilotConfig()
if conf == nil || !conf.CleanupDeadServers {
return nil
}
state := a.GetState()
failed, voters, err := a.getFailedServers()
if err != nil || failed == nil {
return err
}
failed = a.promoter.FilterFailedServerRemovals(conf, state, failed)
// remove failed non voting servers
for _, srv := range failed.FailedNonVoters {
a.logger.Info("Attempting removal of failed server node", "id", srv.ID, "name", srv.Name, "address", srv.Address)
a.delegate.RemoveFailedServer(srv)
}
// remove stale non voters
for _, id := range failed.StaleNonVoters {
a.logger.Debug("removing stale raft server from configuration", "id", id)
if err := a.removeServer(id); err != nil {
return err
}
}
maxRemoval := (voters - 1) / 2
for _, id := range failed.StaleVoters {
if voters-1 < int(conf.MinQuorum) {
a.logger.Debug("will not remove server as it would leave less voters than the minimum number allowed", "id", id, "min", conf.MinQuorum)
} else if maxRemoval < 1 {
a.logger.Debug("will not remove server as removal of a majority or servers is not safe", "id", id)
} else if err := a.removeServer(id); err != nil {
return err
} else {
maxRemoval--
voters--
}
}
for _, srv := range failed.FailedVoters {
if voters-1 < int(conf.MinQuorum) {
a.logger.Debug("will not remove server as it would leave less voters than the minimum number allowed", "id", srv.ID, "min", conf.MinQuorum)
} else if maxRemoval < 1 {
a.logger.Debug("will not remove server as its removal would be unsafe due to affectingas removal of a majority or servers is not safe", "id", srv.ID)
} else {
a.logger.Info("Attempting removal of failed server node", "id", srv.ID, "name", srv.Name, "address", srv.Address)
a.delegate.RemoveFailedServer(srv)
maxRemoval--
voters--
}
}
return nil
}

178
vendor/github.com/hashicorp/raft-autopilot/run.go generated vendored Normal file
View File

@ -0,0 +1,178 @@
package autopilot
import (
"context"
"time"
)
// Start will launch the go routines in the background to perform Autopilot.
// When the context passed in is cancelled or the Stop method is called
// then these routines will exit.
func (a *Autopilot) Start(ctx context.Context) {
a.execLock.Lock()
defer a.execLock.Unlock()
// already running so there is nothing to do
if a.execution != nil && a.execution.status == Running {
return
}
ctx, shutdown := context.WithCancel(ctx)
a.startTime = a.time.Now()
exec := &execInfo{
status: Running,
shutdown: shutdown,
done: make(chan struct{}),
}
if a.execution == nil || a.execution.status == NotRunning {
// In theory with a nil execution or the current execution being in the not
// running state, we should be able to immediately gain the leader lock as
// nothing else should be running and holding the lock. While true we still
// gain the lock to ensure that only one thread may even attempt to be
// modifying the autopilot state at once.
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
if err := a.leaderLock.TryLock(ctx); err == nil {
a.updateState(ctx)
a.leaderLock.Unlock()
}
}
go a.beginExecution(ctx, exec)
a.execution = exec
return
}
// Stop will terminate the go routines being executed to perform autopilot.
func (a *Autopilot) Stop() <-chan struct{} {
a.execLock.Lock()
defer a.execLock.Unlock()
// Nothing to do
if a.execution == nil || a.execution.status == NotRunning {
done := make(chan struct{})
close(done)
return done
}
a.execution.shutdown()
a.execution.status = ShuttingDown
return a.execution.done
}
// IsRunning returns the current execution status of the autopilot
// go routines as well as a chan which will be closed when the
// routines are no longer running
func (a *Autopilot) IsRunning() (ExecutionStatus, <-chan struct{}) {
a.execLock.Lock()
defer a.execLock.Unlock()
if a.execution == nil || a.execution.status == NotRunning {
done := make(chan struct{})
close(done)
return NotRunning, done
}
return a.execution.status, a.execution.done
}
func (a *Autopilot) finishExecution(exec *execInfo) {
// need to gain the lock because if this was the active execution
// then these values may be read while they are updated.
a.execLock.Lock()
defer a.execLock.Unlock()
exec.shutdown = nil
exec.status = NotRunning
// this should be the final cleanup task as it is what notifies the rest
// of the world that we are now done
close(exec.done)
exec.done = nil
}
func (a *Autopilot) beginExecution(ctx context.Context, exec *execInfo) {
// This will wait for any other go routine to finish executing
// before running any code ourselves to prevent any conflicting
// activity between the two.
if err := a.leaderLock.TryLock(ctx); err != nil {
a.finishExecution(exec)
return
}
a.logger.Debug("autopilot is now running")
// autopilot needs to do 3 things
//
// 1. periodically update the cluster state
// 2. periodically check for and perform promotions and demotions
// 3. Respond to servers leaving and prune dead servers
//
// We could attempt to do all of this in a single go routine except that
// updating the cluster health could potentially take long enough to impact
// the periodicity of the promotions and demotions performed by task 2/3.
// So instead this go routine will spawn a second go routine to manage
// updating the cluster health in the background. This go routine is still
// in control of the overall running status and will not exit until the
// child go routine has exited.
// child go routine for cluster health updating
stateUpdaterDone := make(chan struct{})
go a.runStateUpdater(ctx, stateUpdaterDone)
// cleanup for once we are stopped
defer func() {
// block waiting for our child go routine to also finish
<-stateUpdaterDone
a.logger.Debug("autopilot is now stopped")
a.finishExecution(exec)
a.leaderLock.Unlock()
}()
reconcileTicker := time.NewTicker(a.reconcileInterval)
defer reconcileTicker.Stop()
for {
select {
case <-ctx.Done():
return
case <-reconcileTicker.C:
if err := a.reconcile(); err != nil {
a.logger.Error("Failed to reconcile current state with the desired state")
}
if err := a.pruneDeadServers(); err != nil {
a.logger.Error("Failed to prune dead servers", "error", err)
}
case <-a.removeDeadCh:
if err := a.pruneDeadServers(); err != nil {
a.logger.Error("Failed to prune dead servers", "error", err)
}
}
}
}
// runStateUpdated will periodically update the autopilot state until the context
// passed in is cancelled. When finished the provide done chan will be closed.
func (a *Autopilot) runStateUpdater(ctx context.Context, done chan struct{}) {
a.logger.Debug("state update routine is now running")
defer func() {
a.logger.Debug("state update routine is now stopped")
close(done)
}()
ticker := time.NewTicker(a.updateInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
a.updateState(ctx)
}
}
}

View File

@ -0,0 +1,54 @@
package autopilot
import (
"time"
"github.com/hashicorp/raft"
)
func DefaultPromoter() Promoter {
return new(StablePromoter)
}
type StablePromoter struct{}
func (_ *StablePromoter) GetServerExt(_ *Config, srv *ServerState) interface{} {
return nil
}
func (_ *StablePromoter) GetStateExt(_ *Config, _ *State) interface{} {
return nil
}
func (_ *StablePromoter) GetNodeTypes(_ *Config, s *State) map[raft.ServerID]NodeType {
types := make(map[raft.ServerID]NodeType)
for id := range s.Servers {
// this basic implementation has all nodes be of the "voter" type regardless of
// any other settings. That means that in a healthy state all nodes in the cluster
// will be a voter.
types[id] = NodeVoter
}
return types
}
func (_ *StablePromoter) FilterFailedServerRemovals(_ *Config, _ *State, failed *FailedServers) *FailedServers {
return failed
}
// CalculatePromotionsAndDemotions will return a list of all promotions and demotions to be done as well as the server id of
// the desired leader. This particular interface implementation maintains a stable leader and will promote healthy servers
// to voting status. It will never change the leader ID nor will it perform demotions.
func (_ *StablePromoter) CalculatePromotionsAndDemotions(c *Config, s *State) RaftChanges {
var changes RaftChanges
now := time.Now()
minStableDuration := s.ServerStabilizationTime(c)
for id, server := range s.Servers {
// ignore staging state as they are not ready yet
if server.State == RaftNonVoter && server.Health.IsStable(now, minStableDuration) {
changes.Promotions = append(changes.Promotions, id)
}
}
return changes
}

398
vendor/github.com/hashicorp/raft-autopilot/state.go generated vendored Normal file
View File

@ -0,0 +1,398 @@
package autopilot
import (
"context"
"fmt"
"sort"
"time"
"github.com/hashicorp/raft"
)
// aliveServers will filter the input map of servers and output one with all of the
// servers in a Left state removed.
func aliveServers(servers map[raft.ServerID]*Server) map[raft.ServerID]*Server {
serverMap := make(map[raft.ServerID]*Server)
for _, server := range servers {
if server.NodeStatus == NodeLeft {
continue
}
serverMap[server.ID] = server
}
return serverMap
}
// nextStateInputs is the collection of values that can influence
// creation of the next State.
type nextStateInputs struct {
Now time.Time
StartTime time.Time
Config *Config
RaftConfig *raft.Configuration
KnownServers map[raft.ServerID]*Server
LatestIndex uint64
LastTerm uint64
FetchedStats map[raft.ServerID]*ServerStats
LeaderID raft.ServerID
}
// gatherNextStateInputs gathers all the information that would be used to
// create the new updated state from.
//
// - Time Providers current time.
// - Autopilot Config (needed to determine if the stats should indicate unhealthiness)
// - Current state
// - Raft Configuration
// - Known Servers
// - Latest raft index (gathered right before the remote server stats so that they should
// be from about the same point in time)
// - Stats for all non-left servers
func (a *Autopilot) gatherNextStateInputs(ctx context.Context) (*nextStateInputs, error) {
// there are a lot of inputs to computing the next state so they get put into a
// struct so that we don't have to return 8 values.
inputs := &nextStateInputs{
Now: a.time.Now(),
StartTime: a.startTime,
}
// grab the latest autopilot configuration
config := a.delegate.AutopilotConfig()
if config == nil {
return nil, fmt.Errorf("delegate did not return an Autopilot configuration")
}
inputs.Config = config
// retrieve the raft configuration
raftConfig, err := a.getRaftConfiguration()
if err != nil {
return nil, fmt.Errorf("failed to get the Raft configuration: %w", err)
}
inputs.RaftConfig = raftConfig
leader := a.raft.Leader()
for _, s := range inputs.RaftConfig.Servers {
if s.Address == leader {
inputs.LeaderID = s.ID
break
}
}
if inputs.LeaderID == "" {
return nil, fmt.Errorf("cannot detect the current leader server id from its address: %s", leader)
}
// get the latest Raft index - this should be kept close to the call to
// fetch the statistics so that the index values are as close in time as
// possible to make the best decision regarding an individual servers
// healthiness.
inputs.LatestIndex = a.raft.LastIndex()
term, err := a.lastTerm()
if err != nil {
return nil, fmt.Errorf("failed to determine the last Raft term: %w", err)
}
inputs.LastTerm = term
// getting the raft configuration could block for a while so now is a good
// time to check for context cancellation
if ctx.Err() != nil {
return nil, ctx.Err()
}
// get the known servers which may include left/failed ones
inputs.KnownServers = a.delegate.KnownServers()
// in most cases getting the known servers should be quick but as we cannot
// account for every potential delegate and prevent them from making
// blocking network requests we should probably check the context again.
if ctx.Err() != nil {
return nil, ctx.Err()
}
// we only allow the fetch to take place for up to half the health interval
// the next health interval will attempt to fetch the stats again but if
// we do not see responses within this time then we can assume they are
// unhealthy
d := inputs.Now.Add(a.updateInterval / 2)
fetchCtx, cancel := context.WithDeadline(ctx, d)
defer cancel()
inputs.FetchedStats = a.delegate.FetchServerStats(fetchCtx, aliveServers(inputs.KnownServers))
// it might be nil but we propagate the ctx.Err just in case our context was
// cancelled since the last time we checked.
return inputs, ctx.Err()
}
// nextState will gather many inputs about the current state of servers from the
// delegate, raft and time provider among other sources and then compute the
// next Autopilot state.
func (a *Autopilot) nextState(ctx context.Context) (*State, error) {
inputs, err := a.gatherNextStateInputs(ctx)
if err != nil {
return nil, err
}
state := a.nextStateWithInputs(inputs)
if state.Leader == "" {
return nil, fmt.Errorf("Unabled to detect the leader server")
}
return state, nil
}
// nextStateWithInputs computes the next state given pre-gathered inputs
func (a *Autopilot) nextStateWithInputs(inputs *nextStateInputs) *State {
nextServers := a.nextServers(inputs)
newState := &State{
startTime: inputs.StartTime,
Healthy: true,
Servers: nextServers,
}
voterCount := 0
healthyVoters := 0
// This loop will
// 1. Determine the ID of the leader server and set it in the state
// 2. Count the number of voters in the cluster
// 3. Count the number of healthy voters in the cluster
// 4. Detect unhealthy servers and mark the overall health as false
for id, srv := range nextServers {
if !srv.Health.Healthy {
// any unhealthiness results in overall unhealthiness
newState.Healthy = false
}
switch srv.State {
case RaftLeader:
newState.Leader = id
fallthrough
case RaftVoter:
newState.Voters = append(newState.Voters, id)
voterCount++
if srv.Health.Healthy {
healthyVoters++
}
}
}
// If we have extra healthy voters, update FailureTolerance from its
// zero value in the struct.
requiredQuorum := requiredQuorum(voterCount)
if healthyVoters > requiredQuorum {
newState.FailureTolerance = healthyVoters - requiredQuorum
}
// update any promoter specific overall state
if newExt := a.promoter.GetStateExt(inputs.Config, newState); newExt != nil {
newState.Ext = newExt
}
// update the node types - these are really informational for users to
// know how autopilot and the associate promoter algorithms have classed
// each server as some promotion algorithms may want to keep certain
// servers as non-voters for reasons. The node type then can be used
// to indicate why that might be happening.
for id, typ := range a.promoter.GetNodeTypes(inputs.Config, newState) {
if srv, ok := newState.Servers[id]; ok {
srv.Server.NodeType = typ
}
}
// Sort the voters list to keep the output stable. This is done near the end
// as SortServers may use other parts of the state that were created in
// this method and populated in the newState. Requiring output stability
// helps make tests easier to manage and means that if you happen to be dumping
// the state periodically you shouldn't see things change unless there
// are real changes to server health or overall configuration.
SortServers(newState.Voters, newState)
return newState
}
// nextServers will build out the servers map for the next state to be created
// from the given inputs. This will take into account all the various sources
// of partial state (current state, raft config, application known servers etc.)
// and combine them into the final server map.
func (a *Autopilot) nextServers(inputs *nextStateInputs) map[raft.ServerID]*ServerState {
newServers := make(map[raft.ServerID]*ServerState)
for _, srv := range inputs.RaftConfig.Servers {
state := a.buildServerState(inputs, srv)
// update any promoter specific information. This isn't done within
// buildServerState to keep that function "pure" and not require
// mocking for tests
if newExt := a.promoter.GetServerExt(inputs.Config, &state); newExt != nil {
state.Server.Ext = newExt
}
newServers[srv.ID] = &state
}
return newServers
}
// buildServerState takes all the nextStateInputs and builds out a ServerState
// for the given Raft server. This will take into account the raft configuration
// existing state, application known servers and recently fetched stats.
func (a *Autopilot) buildServerState(inputs *nextStateInputs, srv raft.Server) ServerState {
// Note that the ordering of operations in this method are very important.
// We are building up the ServerState from the least important sources
// and overriding them with more up to date values.
// build the basic state from the Raft server
state := ServerState{
Server: Server{
ID: srv.ID,
Address: srv.Address,
},
}
switch srv.Suffrage {
case raft.Voter:
state.State = RaftVoter
case raft.Nonvoter:
state.State = RaftNonVoter
case raft.Staging:
state.State = RaftStaging
default:
// should be impossible unless the constants in Raft were updated
// to have a new state.
// TODO (mkeeler) maybe a panic would be better here. The downside is
// that it would be hard to catch that in tests when updating the Raft
// version.
state.State = RaftNone
}
// overwrite the raft state to mark the leader as such instead of just
// a regular voter
if srv.ID == inputs.LeaderID {
state.State = RaftLeader
}
var previousHealthy *bool
a.stateLock.RLock()
// copy some state from an existing server into the new state - most of this
// should be overridden soon but at this point we are just building the base.
if existing, found := a.state.Servers[srv.ID]; found {
state.Stats = existing.Stats
state.Health = existing.Health
previousHealthy = &state.Health.Healthy
// it is is important to note that the map values we retrieved this from are
// stored by value. Therefore we are modifying a copy of what is in the existing
// state and not the actual state itself. We want to ensure that the Address
// is what Raft will know about.
state.Server = existing.Server
state.Server.Address = srv.Address
}
a.stateLock.RUnlock()
// pull in the latest information from the applications knowledge of the
// server. Mainly we want the NodeStatus & Meta
if known, found := inputs.KnownServers[srv.ID]; found {
// it is important to note that we are modifying a copy of a Server as the
// map we retrieved this from has a non-pointer type value. We definitely
// do not want to modify the current known servers but we do want to ensure
// that we do not overwrite the Address
state.Server = *known
state.Server.Address = srv.Address
} else {
// TODO (mkeeler) do we need a None state. In the previous autopilot code
// we would have set this to serf.StatusNone
state.Server.NodeStatus = NodeLeft
}
// override the Stats if any where in the fetched results
if stats, found := inputs.FetchedStats[srv.ID]; found {
state.Stats = *stats
}
// now populate the healthy field given the stats
state.Health.Healthy = state.isHealthy(inputs.LastTerm, inputs.LatestIndex, inputs.Config)
// overwrite the StableSince field if this is a new server or when
// the health status changes. No need for an else as we previously set
// it when we overwrote the whole Health structure when finding a
// server in the existing state
if previousHealthy == nil || *previousHealthy != state.Health.Healthy {
state.Health.StableSince = inputs.Now
}
return state
}
// updateState will compute the nextState, set it on the Autopilot instance and
// then notify the delegate of the update.
func (a *Autopilot) updateState(ctx context.Context) {
newState, err := a.nextState(ctx)
if err != nil {
a.logger.Error("Error when computing next state", "error", err)
return
}
a.stateLock.Lock()
defer a.stateLock.Unlock()
a.state = newState
a.delegate.NotifyState(newState)
}
// SortServers will take a list of raft ServerIDs and sort it using
// information from the State. See the ServerLessThan function for
// details about how two servers get compared.
func SortServers(ids []raft.ServerID, s *State) {
sort.Slice(ids, func(i, j int) bool {
return ServerLessThan(ids[i], ids[j], s)
})
}
// ServerLessThan will lookup both servers in the given State and return
// true if the first id corresponds to a server that is logically less than
// lower than, better than etc. the second server. The following criteria
// are considered in order of most important to least important
//
// 1. A Leader server is always less than all others
// 2. A voter is less than non voters
// 3. Healthy servers are less than unhealthy servers
// 4. Servers that have been stable longer are consider less than.
func ServerLessThan(id1 raft.ServerID, id2 raft.ServerID, s *State) bool {
srvI := s.Servers[id1]
srvJ := s.Servers[id2]
// the leader always comes first
if srvI.State == RaftLeader {
return true
} else if srvJ.State == RaftLeader {
return false
}
// voters come before non-voters & staging
if srvI.State == RaftVoter && srvJ.State != RaftVoter {
return true
} else if srvI.State != RaftVoter && srvJ.State == RaftVoter {
return false
}
// at this point we know that the raft state of both nodes is roughly
// equivalent so we want to now sort based on health
if srvI.Health.Healthy == srvJ.Health.Healthy {
if srvI.Health.StableSince.Before(srvJ.Health.StableSince) {
return srvI.Health.Healthy
} else if srvJ.Health.StableSince.Before(srvI.Health.StableSince) {
return !srvI.Health.Healthy
}
// with all else equal sort by the IDs
return id1 < id2
}
// one of the two isn't healthy. We consider the healthy one as less than
// the other. So we return true if server I is healthy and false if it isn't
// as we know that server J is healthy and thus should come before server I.
return srvI.Health.Healthy
}

298
vendor/github.com/hashicorp/raft-autopilot/types.go generated vendored Normal file
View File

@ -0,0 +1,298 @@
package autopilot
import (
"context"
"time"
"github.com/hashicorp/raft"
)
//go:generate mockery -all -inpkg -case snake -testonly
// RaftState is the status of a single server in the Raft cluster.
type RaftState string
const (
RaftNone RaftState = "none"
RaftLeader RaftState = "leader"
RaftVoter RaftState = "voter"
RaftNonVoter RaftState = "non-voter"
RaftStaging RaftState = "staging"
)
func (s RaftState) IsPotentialVoter() bool {
switch s {
case RaftVoter, RaftStaging, RaftLeader:
return true
default:
return false
}
}
// NodeStatus represents the health of a server as know to the autopilot consumer.
// This should not take into account Raft health and the server being on a new enough
// term and index.
type NodeStatus string
const (
NodeUnknown NodeStatus = "unknown"
NodeAlive NodeStatus = "alive"
NodeFailed NodeStatus = "failed"
NodeLeft NodeStatus = "left"
)
type NodeType string
const (
NodeVoter NodeType = "voter"
)
// Config represents all the tunables of autopilot
type Config struct {
// CleanupDeadServers controls whether to remove dead servers when a new
// server is added to the Raft peers.
CleanupDeadServers bool
// LastContactThreshold is the limit on the amount of time a server can go
// without leader contact before being considered unhealthy.
LastContactThreshold time.Duration
// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
// be behind before being considered unhealthy.
MaxTrailingLogs uint64
// MinQuorum sets the minimum number of servers required in a cluster
// before autopilot can prune dead servers.
MinQuorum uint
// ServerStabilizationTime is the minimum amount of time a server must be
// in a stable, healthy state before it can be added to the cluster. Only
// applicable with Raft protocol version 3 or higher.
ServerStabilizationTime time.Duration
Ext interface{}
}
// Server represents one Raft server
type Server struct {
// This first set of fields are those that the autopilot delegate
// needs to fill in
ID raft.ServerID
Name string
Address raft.ServerAddress
NodeStatus NodeStatus
Version string
Meta map[string]string
RaftVersion int
// The remaining fields are those that the promoter
// will fill in
NodeType NodeType
Ext interface{}
}
type ServerState struct {
Server Server
State RaftState
Stats ServerStats
Health ServerHealth
}
func (s *ServerState) HasVotingRights() bool {
return s.State == RaftVoter || s.State == RaftLeader
}
// isHealthy determines whether this ServerState is considered healthy
// based on the given Autopilot config
func (s *ServerState) isHealthy(lastTerm uint64, leaderLastIndex uint64, conf *Config) bool {
if s.Server.NodeStatus != NodeAlive {
return false
}
if s.Stats.LastContact > conf.LastContactThreshold || s.Stats.LastContact < 0 {
return false
}
if s.Stats.LastTerm != lastTerm {
return false
}
if leaderLastIndex > conf.MaxTrailingLogs && s.Stats.LastIndex < leaderLastIndex-conf.MaxTrailingLogs {
return false
}
return true
}
type ServerHealth struct {
// Healthy is whether or not the server is healthy according to the current
// Autopilot config.
Healthy bool
// StableSince is the last time this server's Healthy value changed.
StableSince time.Time
}
// IsStable returns true if the ServerState shows a stable, passing state
// according to the given AutopilotConfig
func (h *ServerHealth) IsStable(now time.Time, minStableDuration time.Duration) bool {
if h == nil {
return false
}
if !h.Healthy {
return false
}
if now.Sub(h.StableSince) < minStableDuration {
return false
}
return true
}
// ServerStats holds miscellaneous Raft metrics for a server
type ServerStats struct {
// LastContact is the time since this node's last contact with the leader.
LastContact time.Duration
// LastTerm is the highest leader term this server has a record of in its Raft log.
LastTerm uint64
// LastIndex is the last log index this server has a record of in its Raft log.
LastIndex uint64
}
type State struct {
startTime time.Time
Healthy bool
FailureTolerance int
Servers map[raft.ServerID]*ServerState
Leader raft.ServerID
Voters []raft.ServerID
Ext interface{}
}
func (s *State) ServerStabilizationTime(c *Config) time.Duration {
// Only use the configured stabilization time when autopilot has
// been running for 110% of the configured stabilization time.
// Before that time we haven't been running long enough to
// be able to take these values into account. 110% is pretty
// arbitrary but with the default config would prevent the
// stabilization time from mattering for an extra second. This
// allows for leeway in how quickly we get the healthy RPC responses
// after autopilot is started.
if time.Since(s.startTime) > (c.ServerStabilizationTime*110)/100 {
return c.ServerStabilizationTime
}
// ignore stabilization time if autopilot hasn't been running long enough
// to be tracking any server long enough to meet that requirement
return 0
}
// Raft is the interface of all the methods on the Raft type that autopilot needs to function. Autopilot will
// take in an interface for Raft instead of a concrete type to allow for dependency injection in tests.
type Raft interface {
AddNonvoter(id raft.ServerID, address raft.ServerAddress, prevIndex uint64, timeout time.Duration) raft.IndexFuture
AddVoter(id raft.ServerID, address raft.ServerAddress, prevIndex uint64, timeout time.Duration) raft.IndexFuture
DemoteVoter(id raft.ServerID, prevIndex uint64, timeout time.Duration) raft.IndexFuture
LastIndex() uint64
Leader() raft.ServerAddress
GetConfiguration() raft.ConfigurationFuture
RemoveServer(id raft.ServerID, prevIndex uint64, timeout time.Duration) raft.IndexFuture
Stats() map[string]string
LeadershipTransferToServer(id raft.ServerID, address raft.ServerAddress) raft.Future
}
type ApplicationIntegration interface {
// AutopilotConfig is used to retrieve the latest configuration from the delegate
AutopilotConfig() *Config
// NotifyState will be called when the autopilot state is updated. The application may choose to emit metrics
// or perform other actions based on this information.
NotifyState(*State)
// FetchServerStats will be called to request the application fetch the ServerStats out of band. Usually this
// will require an RPC to each server.
FetchServerStats(context.Context, map[raft.ServerID]*Server) map[raft.ServerID]*ServerStats
// KnownServers fetchs the list of servers as known to the application
KnownServers() map[raft.ServerID]*Server
// RemoveFailedServer notifies the application to forcefully remove the server in the failed state
// It is expected that this returns nearly immediately so if a longer running operation needs to be
// performed then the Delegate implementation should spawn a go routine itself.
RemoveFailedServer(*Server)
}
type RaftChanges struct {
Promotions []raft.ServerID
Demotions []raft.ServerID
Leader raft.ServerID
}
type FailedServers struct {
// StaleNonVoters are the ids of those server in the raft configuration as non-voters
// that are not present in the delegates view of what servers should be available
StaleNonVoters []raft.ServerID
// StaleVoters are the ids of those servers in the raft configuration as voters that
// are not present in the delegates view of what servers should be available
StaleVoters []raft.ServerID
// FailedNonVoters are the servers without voting rights in the cluster that the
// delegate has indicated are in a failed state
FailedNonVoters []*Server
// FailedVoters are the servers without voting rights in the cluster that the
// delegate has indicated are in a failed state
FailedVoters []*Server
}
// Promoter is an interface to provide promotion/demotion algorithms to the core autopilot type.
// The BasicPromoter satisfies this interface and will promote any stable servers but other
// algorithms could be implemented. The implementation of these methods shouldn't "block".
// While they are synchronous autopilot expects the algorithms to not make any network
// or other requests which way cause an indefinite amount of waiting to occur.
//
// Note that all parameters passed to these functions should be considered read-only and
// their modification could result in undefined behavior of the core autopilot routines
// including potential crashes.
type Promoter interface {
// GetServerExt returns some object that should be stored in the Ext field of the Server
// This value will not be used by the code in this repo but may be used by the other
// Promoter methods and the application utilizing autopilot. If the value returned is
// nil the extended state will not be updated.
GetServerExt(*Config, *ServerState) interface{}
// GetStateExt returns some object that should be stored in the Ext field of the State
// This value will not be used by the code in this repo but may be used by the other
// Promoter methods and the application utilizing autopilot. If the value returned is
// nil the extended state will not be updated.
GetStateExt(*Config, *State) interface{}
// GetNodeTypes returns a map of ServerID to NodeType for all the servers which
// should have their NodeType field updated
GetNodeTypes(*Config, *State) map[raft.ServerID]NodeType
// CalculatePromotionsAndDemotions
CalculatePromotionsAndDemotions(*Config, *State) RaftChanges
// FilterFailedServerRemovals takes in the current state and structure outlining all the
// failed/stale servers and will return those failed servers which the promoter thinks
// should be allowed to be removed.
FilterFailedServerRemovals(*Config, *State, *FailedServers) *FailedServers
}
// timeProvider is an interface for getting a local time. This is mainly useful for testing
// to inject certain times so that output validation is easier.
type timeProvider interface {
Now() time.Time
}
type runtimeTimeProvider struct{}
func (_ *runtimeTimeProvider) Now() time.Time {
return time.Now()
}

View File

@ -3,6 +3,19 @@
IMPROVEMENTS
* Remove `StartAsLeader` configuration option [[GH-364](https://github.com/hashicorp/raft/pull/386)]
* Allow futures to react to `Shutdown()` to prevent a deadlock with `takeSnapshot()` [[GH-390](https://github.com/hashicorp/raft/pull/390)]
* Prevent non-voters from becoming eligible for leadership elections [[GH-398](https://github.com/hashicorp/raft/pull/398)]
* Remove an unneeded `io.Copy` from snapshot writes [[GH-399](https://github.com/hashicorp/raft/pull/399)]
* Log decoded candidate address in `duplicate requestVote` warning [[GH-400](https://github.com/hashicorp/raft/pull/400)]
* Prevent starting a TCP transport when IP address is `nil` [[GH-403](https://github.com/hashicorp/raft/pull/403)]
* Reject leadership transfer requests when in candidate state to prevent indefinite blocking while unable to elect a leader [[GH-413](https://github.com/hashicorp/raft/pull/413)]
* Add labels for metric metadata to reduce cardinality of metric names [[GH-409](https://github.com/hashicorp/raft/pull/409)]
* Add peers metric [[GH-413](https://github.com/hashicorp/raft/pull/431)]
BUG FIXES
* Make `LeaderCh` always deliver the latest leadership transition [[GH-384](https://github.com/hashicorp/raft/pull/384)]
* Handle updating an existing peer in `startStopReplication` [[GH-419](https://github.com/hashicorp/raft/pull/419)]
# 1.1.2 (January 17th, 2020)

View File

@ -2,9 +2,16 @@ package api
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"time"
"github.com/hashicorp/vault/sdk/helper/parseutil"
"github.com/mitchellh/mapstructure"
"github.com/hashicorp/vault/sdk/helper/consts"
)
@ -27,6 +34,77 @@ type RaftJoinRequest struct {
NonVoter bool `json:"non_voter"`
}
// AutopilotConfig is used for querying/setting the Autopilot configuration.
type AutopilotConfig struct {
CleanupDeadServers bool `json:"cleanup_dead_servers" mapstructure:"cleanup_dead_servers"`
LastContactThreshold time.Duration `json:"last_contact_threshold" mapstructure:"-"`
DeadServerLastContactThreshold time.Duration `json:"dead_server_last_contact_threshold" mapstructure:"-"`
MaxTrailingLogs uint64 `json:"max_trailing_logs" mapstructure:"max_trailing_logs"`
MinQuorum uint `json:"min_quorum" mapstructure:"min_quorum"`
ServerStabilizationTime time.Duration `json:"server_stabilization_time" mapstructure:"-"`
}
// UnmarshalJSON parses the autopilot config JSON blob
func (ac *AutopilotConfig) UnmarshalJSON(b []byte) error {
var data interface{}
err := json.Unmarshal(b, &data)
if err != nil {
return err
}
conf := data.(map[string]interface{})
if err = mapstructure.WeakDecode(conf, ac); err != nil {
return err
}
if ac.LastContactThreshold, err = parseutil.ParseDurationSecond(conf["last_contact_threshold"]); err != nil {
return err
}
if ac.DeadServerLastContactThreshold, err = parseutil.ParseDurationSecond(conf["dead_server_last_contact_threshold"]); err != nil {
return err
}
if ac.ServerStabilizationTime, err = parseutil.ParseDurationSecond(conf["server_stabilization_time"]); err != nil {
return err
}
return nil
}
// AutopilotExecutionStatus represents the current status of the autopilot background go routines
type AutopilotExecutionStatus string
const (
AutopilotNotRunning AutopilotExecutionStatus = "not-running"
AutopilotRunning AutopilotExecutionStatus = "running"
AutopilotShuttingDown AutopilotExecutionStatus = "shutting-down"
)
// AutopilotState represents the response of the raft autopilot state API
type AutopilotState struct {
ExecutionStatus AutopilotExecutionStatus `mapstructure:"execution_status"`
Healthy bool `mapstructure:"healthy"`
FailureTolerance int `mapstructure:"failure_tolerance"`
OptimisticFailureTolerance int `mapstructure:"optimistic_failure_tolerance"`
Servers map[string]*AutopilotServer `mapstructure:"servers"`
Leader string `mapstructure:"leader"`
Voters []string `mapstructure:"voters"`
NonVoters []string `mapstructure:"non_voters"`
}
// AutopilotServer represents the server blocks in the response of the raft
// autopilot state API.
type AutopilotServer struct {
ID string `mapstructure:"id"`
Name string `mapstructure:"name"`
Address string `mapstructure:"address"`
NodeStatus string `mapstructure:"node_status"`
LastContact string `mapstructure:"last_contact"`
LastTerm uint64 `mapstructure:"last_term"`
LastIndex uint64 `mapstructure:"last_index"`
Healthy bool `mapstructure:"healthy"`
StableSince string `mapstructure:"stable_since"`
Status string `mapstructure:"status"`
Meta map[string]string `mapstructure:"meta"`
}
// RaftJoin adds the node from which this call is invoked from to the raft
// cluster represented by the leader address in the parameter.
func (c *Sys) RaftJoin(opts *RaftJoinRequest) (*RaftJoinResponse, error) {
@ -160,3 +238,79 @@ func (c *Sys) RaftSnapshotRestore(snapReader io.Reader, force bool) error {
return nil
}
// RaftAutopilotState returns the state of the raft cluster as seen by autopilot.
func (c *Sys) RaftAutopilotState() (*AutopilotState, error) {
r := c.c.NewRequest("GET", "/v1/sys/storage/raft/autopilot/state")
ctx, cancelFunc := context.WithCancel(context.Background())
defer cancelFunc()
resp, err := c.c.RawRequestWithContext(ctx, r)
if resp != nil {
defer resp.Body.Close()
if resp.StatusCode == 404 {
return nil, nil
}
}
if err != nil {
return nil, err
}
secret, err := ParseSecret(resp.Body)
if err != nil {
return nil, err
}
if secret == nil || secret.Data == nil {
return nil, errors.New("data from server response is empty")
}
var result AutopilotState
err = mapstructure.Decode(secret.Data, &result)
if err != nil {
return nil, err
}
return &result, err
}
// RaftAutopilotConfiguration fetches the autopilot config.
func (c *Sys) RaftAutopilotConfiguration() (*AutopilotConfig, error) {
r := c.c.NewRequest("GET", "/v1/sys/storage/raft/autopilot/configuration")
ctx, cancelFunc := context.WithCancel(context.Background())
defer cancelFunc()
resp, err := c.c.RawRequestWithContext(ctx, r)
if resp != nil {
defer resp.Body.Close()
if resp.StatusCode == 404 {
return nil, nil
}
}
if err != nil {
return nil, err
}
secret, err := ParseSecret(resp.Body)
if err != nil {
return nil, err
}
if secret == nil {
return nil, errors.New("data from server response is empty")
}
var result AutopilotConfig
if err = mapstructure.Decode(secret.Data, &result); err != nil {
return nil, err
}
if result.LastContactThreshold, err = parseutil.ParseDurationSecond(secret.Data["last_contact_threshold"]); err != nil {
return nil, err
}
if result.DeadServerLastContactThreshold, err = parseutil.ParseDurationSecond(secret.Data["dead_server_last_contact_threshold"]); err != nil {
return nil, err
}
if result.ServerStabilizationTime, err = parseutil.ParseDurationSecond(secret.Data["server_stabilization_time"]); err != nil {
return nil, err
}
return &result, err
}

4
vendor/modules.txt vendored
View File

@ -536,8 +536,10 @@ github.com/hashicorp/mdns
# github.com/hashicorp/nomad/api v0.0.0-20191220223628-edc62acd919d
github.com/hashicorp/nomad/api
github.com/hashicorp/nomad/api/contexts
# github.com/hashicorp/raft v1.1.3-0.20201002073007-f367681f9c48
# github.com/hashicorp/raft v1.2.0
github.com/hashicorp/raft
# github.com/hashicorp/raft-autopilot v0.1.2
github.com/hashicorp/raft-autopilot
# github.com/hashicorp/raft-snapshot v1.0.3
github.com/hashicorp/raft-snapshot
# github.com/hashicorp/serf v0.9.5