From 2ccf565bf6926e97076f9af8dfb0a0fa26f7c6dc Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Mon, 29 Jan 2018 19:53:34 -0800 Subject: [PATCH 1/3] Refactor redundancy_zone/upgrade_version out of client meta --- api/operator_autopilot.go | 128 +++++++++++------- api/operator_autopilot_test.go | 10 +- command/agent/agent.go | 14 +- command/agent/config-test-fixtures/basic.hcl | 6 +- command/agent/config.go | 17 ++- command/agent/config_parse.go | 13 +- command/agent/config_parse_test.go | 6 +- command/agent/config_test.go | 12 +- command/agent/operator_endpoint.go | 78 ++--------- command/agent/operator_endpoint_test.go | 52 +------ command/operator_autopilot_get.go | 4 +- command/operator_autopilot_set.go | 28 ++-- command/operator_autopilot_set_test.go | 4 +- nomad/autopilot.go | 29 +++- nomad/autopilot_test.go | 7 +- nomad/config.go | 12 +- nomad/fsm_test.go | 3 +- nomad/leader.go | 3 +- nomad/operator_endpoint.go | 2 +- nomad/server.go | 6 + nomad/state/autopilot.go | 16 +-- nomad/state/autopilot_test.go | 14 +- nomad/structs/config/autopilot.go | 26 ++-- nomad/structs/config/autopilot_test.go | 12 +- nomad/structs/operator.go | 41 +++++- nomad/util.go | 3 - website/source/api/operator.html.md | 23 ++-- .../agent/configuration/autopilot.html.md | 25 ++-- .../docs/agent/configuration/server.html.md | 13 +- .../source/guides/cluster/autopilot.html.md | 42 +++--- 30 files changed, 350 insertions(+), 299 deletions(-) diff --git a/api/operator_autopilot.go b/api/operator_autopilot.go index a61ad21d6..2dbde9dd2 100644 --- a/api/operator_autopilot.go +++ b/api/operator_autopilot.go @@ -2,6 +2,7 @@ package api import ( "bytes" + "encoding/json" "fmt" "io" "strconv" @@ -19,7 +20,7 @@ type AutopilotConfiguration struct { // LastContactThreshold is the limit on the amount of time a server can go // without leader contact before being considered unhealthy. - LastContactThreshold *ReadableDuration + LastContactThreshold time.Duration // MaxTrailingLogs is the amount of entries in the Raft Log that a server can // be behind before being considered unhealthy. @@ -28,20 +29,19 @@ type AutopilotConfiguration struct { // ServerStabilizationTime is the minimum amount of time a server must be // in a stable, healthy state before it can be added to the cluster. Only // applicable with Raft protocol version 3 or higher. - ServerStabilizationTime *ReadableDuration + ServerStabilizationTime time.Duration - // (Enterprise-only) RedundancyZoneTag is the node tag to use for separating - // servers into zones for redundancy. If left blank, this feature will be disabled. - RedundancyZoneTag string + // (Enterprise-only) EnableRedundancyZones specifies whether to enable redundancy zones. + EnableRedundancyZones bool // (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration // strategy of waiting until enough newer-versioned servers have been added to the // cluster before promoting them to voters. DisableUpgradeMigration bool - // (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when - // performing upgrade migrations. If left blank, the Nomad version will be used. - UpgradeVersionTag string + // (Enterprise-only) EnableCustomUpgrades specifies whether to enable using custom + // upgrade versions when performing migrations. + EnableCustomUpgrades bool // CreateIndex holds the index corresponding the creation of this configuration. // This is a read-only field. @@ -54,6 +54,45 @@ type AutopilotConfiguration struct { ModifyIndex uint64 } +func (u *AutopilotConfiguration) MarshalJSON() ([]byte, error) { + type Alias AutopilotConfiguration + return json.Marshal(&struct { + LastContactThreshold string + ServerStabilizationTime string + *Alias + }{ + LastContactThreshold: u.LastContactThreshold.String(), + ServerStabilizationTime: u.ServerStabilizationTime.String(), + Alias: (*Alias)(u), + }) +} + +func (u *AutopilotConfiguration) UnmarshalJSON(data []byte) error { + type Alias AutopilotConfiguration + aux := &struct { + LastContactThreshold string + ServerStabilizationTime string + *Alias + }{ + Alias: (*Alias)(u), + } + if err := json.Unmarshal(data, &aux); err != nil { + return err + } + var err error + if aux.LastContactThreshold != "" { + if u.LastContactThreshold, err = time.ParseDuration(aux.LastContactThreshold); err != nil { + return err + } + } + if aux.ServerStabilizationTime != "" { + if u.ServerStabilizationTime, err = time.ParseDuration(aux.ServerStabilizationTime); err != nil { + return err + } + } + return nil +} + // ServerHealth is the health (from the leader's point of view) of a server. type ServerHealth struct { // ID is the raft ID of the server. @@ -75,7 +114,7 @@ type ServerHealth struct { Leader bool // LastContact is the time since this node's last contact with the leader. - LastContact *ReadableDuration + LastContact time.Duration // LastTerm is the highest leader term this server has a record of in its Raft log. LastTerm uint64 @@ -94,6 +133,37 @@ type ServerHealth struct { StableSince time.Time } +func (u *ServerHealth) MarshalJSON() ([]byte, error) { + type Alias ServerHealth + return json.Marshal(&struct { + LastContact string + *Alias + }{ + LastContact: u.LastContact.String(), + Alias: (*Alias)(u), + }) +} + +func (u *ServerHealth) UnmarshalJSON(data []byte) error { + type Alias ServerHealth + aux := &struct { + LastContact string + *Alias + }{ + Alias: (*Alias)(u), + } + if err := json.Unmarshal(data, &aux); err != nil { + return err + } + var err error + if aux.LastContact != "" { + if u.LastContact, err = time.ParseDuration(aux.LastContact); err != nil { + return err + } + } + return nil +} + // OperatorHealthReply is a representation of the overall health of the cluster type OperatorHealthReply struct { // Healthy is true if all the servers in the cluster are healthy. @@ -107,46 +177,6 @@ type OperatorHealthReply struct { Servers []ServerHealth } -// ReadableDuration is a duration type that is serialized to JSON in human readable format. -type ReadableDuration time.Duration - -func NewReadableDuration(dur time.Duration) *ReadableDuration { - d := ReadableDuration(dur) - return &d -} - -func (d *ReadableDuration) String() string { - return d.Duration().String() -} - -func (d *ReadableDuration) Duration() time.Duration { - if d == nil { - return time.Duration(0) - } - return time.Duration(*d) -} - -func (d *ReadableDuration) MarshalJSON() ([]byte, error) { - return []byte(fmt.Sprintf(`"%s"`, d.Duration().String())), nil -} - -func (d *ReadableDuration) UnmarshalJSON(raw []byte) error { - if d == nil { - return fmt.Errorf("cannot unmarshal to nil pointer") - } - - str := string(raw) - if len(str) < 2 || str[0] != '"' || str[len(str)-1] != '"' { - return fmt.Errorf("must be enclosed with quotes: %s", str) - } - dur, err := time.ParseDuration(str[1 : len(str)-1]) - if err != nil { - return err - } - *d = ReadableDuration(dur) - return nil -} - // AutopilotGetConfiguration is used to query the current Autopilot configuration. func (op *Operator) AutopilotGetConfiguration(q *QueryOptions) (*AutopilotConfiguration, error) { r, err := op.c.newRequest("GET", "/v1/operator/autopilot/configuration") diff --git a/api/operator_autopilot_test.go b/api/operator_autopilot_test.go index 1c18e8e0f..c491d855c 100644 --- a/api/operator_autopilot_test.go +++ b/api/operator_autopilot_test.go @@ -17,13 +17,17 @@ func TestAPI_OperatorAutopilotGetSetConfiguration(t *testing.T) { defer s.Stop() operator := c.Operator() - config, err := operator.AutopilotGetConfiguration(nil) - assert.Nil(err) + var config *AutopilotConfiguration + retry.Run(t, func(r *retry.R) { + var err error + config, err = operator.AutopilotGetConfiguration(nil) + r.Check(err) + }) assert.True(config.CleanupDeadServers) // Change a config setting newConf := &AutopilotConfiguration{CleanupDeadServers: false} - err = operator.AutopilotSetConfiguration(newConf, nil) + err := operator.AutopilotSetConfiguration(newConf, nil) assert.Nil(err) config, err = operator.AutopilotGetConfiguration(nil) diff --git a/command/agent/agent.go b/command/agent/agent.go index 43e42c1bb..eff225a3b 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -163,6 +163,12 @@ func convertServerConfig(agentConfig *Config, logOutput io.Writer) (*nomad.Confi if agentConfig.Server.NonVotingServer { conf.NonVoter = true } + if agentConfig.Server.RedundancyZone != "" { + conf.RedundancyZone = agentConfig.Server.RedundancyZone + } + if agentConfig.Server.UpgradeVersion != "" { + conf.UpgradeVersion = agentConfig.Server.UpgradeVersion + } if agentConfig.Autopilot != nil { if agentConfig.Autopilot.CleanupDeadServers != nil { conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers @@ -176,14 +182,14 @@ func convertServerConfig(agentConfig *Config, logOutput io.Writer) (*nomad.Confi if agentConfig.Autopilot.MaxTrailingLogs != 0 { conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) } - if agentConfig.Autopilot.RedundancyZoneTag != "" { - conf.AutopilotConfig.RedundancyZoneTag = agentConfig.Autopilot.RedundancyZoneTag + if agentConfig.Autopilot.EnableRedundancyZones != nil { + conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones } if agentConfig.Autopilot.DisableUpgradeMigration != nil { conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration } - if agentConfig.Autopilot.UpgradeVersionTag != "" { - conf.AutopilotConfig.UpgradeVersionTag = agentConfig.Autopilot.UpgradeVersionTag + if agentConfig.Autopilot.EnableCustomUpgrades != nil { + conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades } } diff --git a/command/agent/config-test-fixtures/basic.hcl b/command/agent/config-test-fixtures/basic.hcl index 5cf8603e7..4ddc65771 100644 --- a/command/agent/config-test-fixtures/basic.hcl +++ b/command/agent/config-test-fixtures/basic.hcl @@ -83,6 +83,8 @@ server { retry_interval = "15s" rejoin_after_leave = true non_voting_server = true + redundancy_zone = "foo" + upgrade_version = "0.8.0" encrypt = "abc" } acl { @@ -166,7 +168,7 @@ autopilot { disable_upgrade_migration = true last_contact_threshold = "12705s" max_trailing_logs = 17849 - redundancy_zone_tag = "foo" + enable_redundancy_zones = true server_stabilization_time = "23057s" - upgrade_version_tag = "bar" + enable_custom_upgrades = true } diff --git a/command/agent/config.go b/command/agent/config.go index 6cff6c378..6eb1fad06 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -330,10 +330,17 @@ type ServerConfig struct { // true, we ignore the leave, and rejoin the cluster on start. RejoinAfterLeave bool `mapstructure:"rejoin_after_leave"` - // NonVotingServer is whether this server will act as a non-voting member - // of the cluster to help provide read scalability. (Enterprise-only) + // (Enterprise-only) NonVotingServer is whether this server will act as a + // non-voting member of the cluster to help provide read scalability. NonVotingServer bool `mapstructure:"non_voting_server"` + // (Enterprise-only) RedundancyZone is the redundancy zone to use for this server. + RedundancyZone string `mapstructure:"redundancy_zone"` + + // (Enterprise-only) UpgradeVersion is the custom upgrade version to use when + // performing upgrade migrations. + UpgradeVersion string `mapstructure:"upgrade_version"` + // Encryption key to use for the Serf communication EncryptKey string `mapstructure:"encrypt" json:"-"` } @@ -1034,6 +1041,12 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { if b.NonVotingServer { result.NonVotingServer = true } + if b.RedundancyZone != "" { + result.RedundancyZone = b.RedundancyZone + } + if b.UpgradeVersion != "" { + result.UpgradeVersion = b.UpgradeVersion + } if b.EncryptKey != "" { result.EncryptKey = b.EncryptKey } diff --git a/command/agent/config_parse.go b/command/agent/config_parse.go index e860a68af..6feaa6d04 100644 --- a/command/agent/config_parse.go +++ b/command/agent/config_parse.go @@ -9,6 +9,7 @@ import ( "time" multierror "github.com/hashicorp/go-multierror" + "github.com/hashicorp/go-version" "github.com/hashicorp/hcl" "github.com/hashicorp/hcl/hcl/ast" "github.com/hashicorp/nomad/helper" @@ -536,6 +537,8 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error { "encrypt", "authoritative_region", "non_voting_server", + "redundancy_zone", + "upgrade_version", } if err := helper.CheckHCLKeys(listVal, valid); err != nil { return err @@ -559,6 +562,12 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error { return err } + if config.UpgradeVersion != "" { + if _, err := version.NewVersion(config.UpgradeVersion); err != nil { + return fmt.Errorf("error parsing upgrade_version: %v", err) + } + } + *result = &config return nil } @@ -865,9 +874,9 @@ func parseAutopilot(result **config.AutopilotConfig, list *ast.ObjectList) error "server_stabilization_time", "last_contact_threshold", "max_trailing_logs", - "redundancy_zone_tag", + "enable_redundancy_zones", "disable_upgrade_migration", - "upgrade_version_tag", + "enable_custom_upgrades", } if err := helper.CheckHCLKeys(listVal, valid); err != nil { diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index c28989d9f..826bceb5f 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -104,6 +104,8 @@ func TestConfig_Parse(t *testing.T) { RejoinAfterLeave: true, RetryMaxAttempts: 3, NonVotingServer: true, + RedundancyZone: "foo", + UpgradeVersion: "0.8.0", EncryptKey: "abc", }, ACL: &ACLConfig{ @@ -193,9 +195,9 @@ func TestConfig_Parse(t *testing.T) { ServerStabilizationTime: 23057 * time.Second, LastContactThreshold: 12705 * time.Second, MaxTrailingLogs: 17849, - RedundancyZoneTag: "foo", + EnableRedundancyZones: &trueValue, DisableUpgradeMigration: &trueValue, - UpgradeVersionTag: "bar", + EnableCustomUpgrades: &trueValue, }, }, false, diff --git a/command/agent/config_test.go b/command/agent/config_test.go index 400b57615..3ed05bd46 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -107,6 +107,8 @@ func TestConfig_Merge(t *testing.T) { HeartbeatGrace: 30 * time.Second, MinHeartbeatTTL: 30 * time.Second, MaxHeartbeatsPerSecond: 30.0, + RedundancyZone: "foo", + UpgradeVersion: "foo", }, ACL: &ACLConfig{ Enabled: true, @@ -165,9 +167,9 @@ func TestConfig_Merge(t *testing.T) { ServerStabilizationTime: 1 * time.Second, LastContactThreshold: 1 * time.Second, MaxTrailingLogs: 1, - RedundancyZoneTag: "1", + EnableRedundancyZones: &falseValue, DisableUpgradeMigration: &falseValue, - UpgradeVersionTag: "1", + EnableCustomUpgrades: &falseValue, }, } @@ -260,6 +262,8 @@ func TestConfig_Merge(t *testing.T) { RetryInterval: "10s", retryInterval: time.Second * 10, NonVotingServer: true, + RedundancyZone: "bar", + UpgradeVersion: "bar", }, ACL: &ACLConfig{ Enabled: true, @@ -328,9 +332,9 @@ func TestConfig_Merge(t *testing.T) { ServerStabilizationTime: 2 * time.Second, LastContactThreshold: 2 * time.Second, MaxTrailingLogs: 2, - RedundancyZoneTag: "2", + EnableRedundancyZones: &trueValue, DisableUpgradeMigration: &trueValue, - UpgradeVersionTag: "2", + EnableCustomUpgrades: &trueValue, }, } diff --git a/command/agent/operator_endpoint.go b/command/agent/operator_endpoint.go index 93db317a2..58f293375 100644 --- a/command/agent/operator_endpoint.go +++ b/command/agent/operator_endpoint.go @@ -104,19 +104,19 @@ func (s *HTTPServer) OperatorAutopilotConfiguration(resp http.ResponseWriter, re return nil, nil } - var reply autopilot.Config + var reply structs.AutopilotConfig if err := s.agent.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil { return nil, err } out := api.AutopilotConfiguration{ CleanupDeadServers: reply.CleanupDeadServers, - LastContactThreshold: api.NewReadableDuration(reply.LastContactThreshold), + LastContactThreshold: reply.LastContactThreshold, MaxTrailingLogs: reply.MaxTrailingLogs, - ServerStabilizationTime: api.NewReadableDuration(reply.ServerStabilizationTime), - RedundancyZoneTag: reply.RedundancyZoneTag, + ServerStabilizationTime: reply.ServerStabilizationTime, + EnableRedundancyZones: reply.EnableRedundancyZones, DisableUpgradeMigration: reply.DisableUpgradeMigration, - UpgradeVersionTag: reply.UpgradeVersionTag, + EnableCustomUpgrades: reply.EnableCustomUpgrades, CreateIndex: reply.CreateIndex, ModifyIndex: reply.ModifyIndex, } @@ -129,21 +129,20 @@ func (s *HTTPServer) OperatorAutopilotConfiguration(resp http.ResponseWriter, re s.parseToken(req, &args.AuthToken) var conf api.AutopilotConfiguration - durations := NewDurationFixer("lastcontactthreshold", "serverstabilizationtime") - if err := decodeBodyFunc(req, &conf, durations.FixupDurations); err != nil { + if err := decodeBody(req, &conf); err != nil { resp.WriteHeader(http.StatusBadRequest) fmt.Fprintf(resp, "Error parsing autopilot config: %v", err) return nil, nil } - args.Config = autopilot.Config{ + args.Config = structs.AutopilotConfig{ CleanupDeadServers: conf.CleanupDeadServers, - LastContactThreshold: conf.LastContactThreshold.Duration(), + LastContactThreshold: conf.LastContactThreshold, MaxTrailingLogs: conf.MaxTrailingLogs, - ServerStabilizationTime: conf.ServerStabilizationTime.Duration(), - RedundancyZoneTag: conf.RedundancyZoneTag, + ServerStabilizationTime: conf.ServerStabilizationTime, + EnableRedundancyZones: conf.EnableRedundancyZones, DisableUpgradeMigration: conf.DisableUpgradeMigration, - UpgradeVersionTag: conf.UpgradeVersionTag, + EnableCustomUpgrades: conf.EnableCustomUpgrades, } // Check for cas value @@ -210,7 +209,7 @@ func (s *HTTPServer) OperatorServerHealth(resp http.ResponseWriter, req *http.Re Version: server.Version, Leader: server.Leader, SerfStatus: server.SerfStatus.String(), - LastContact: api.NewReadableDuration(server.LastContact), + LastContact: server.LastContact, LastTerm: server.LastTerm, LastIndex: server.LastIndex, Healthy: server.Healthy, @@ -221,56 +220,3 @@ func (s *HTTPServer) OperatorServerHealth(resp http.ResponseWriter, req *http.Re return out, nil } - -type durationFixer map[string]bool - -func NewDurationFixer(fields ...string) durationFixer { - d := make(map[string]bool) - for _, field := range fields { - d[field] = true - } - return d -} - -// FixupDurations is used to handle parsing any field names in the map to time.Durations -func (d durationFixer) FixupDurations(raw interface{}) error { - rawMap, ok := raw.(map[string]interface{}) - if !ok { - return nil - } - for key, val := range rawMap { - switch val.(type) { - case map[string]interface{}: - if err := d.FixupDurations(val); err != nil { - return err - } - - case []interface{}: - for _, v := range val.([]interface{}) { - if err := d.FixupDurations(v); err != nil { - return err - } - } - - case []map[string]interface{}: - for _, v := range val.([]map[string]interface{}) { - if err := d.FixupDurations(v); err != nil { - return err - } - } - - default: - if d[strings.ToLower(key)] { - // Convert a string value into an integer - if vStr, ok := val.(string); ok { - dur, err := time.ParseDuration(vStr) - if err != nil { - return err - } - rawMap[key] = dur - } - } - } - } - return nil -} diff --git a/command/agent/operator_endpoint_test.go b/command/agent/operator_endpoint_test.go index 10ee36821..2d8486765 100644 --- a/command/agent/operator_endpoint_test.go +++ b/command/agent/operator_endpoint_test.go @@ -9,7 +9,6 @@ import ( "testing" "time" - "github.com/hashicorp/consul/agent/consul/autopilot" "github.com/hashicorp/consul/testutil/retry" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/nomad/structs" @@ -112,7 +111,7 @@ func TestOperator_AutopilotSetConfiguration(t *testing.T) { t.Fatalf("err: %v", err) } if resp.Code != 200 { - t.Fatalf("bad code: %d", resp.Code) + t.Fatalf("bad code: %d, %q", resp.Code, resp.Body.String()) } args := structs.GenericRequest{ @@ -121,7 +120,7 @@ func TestOperator_AutopilotSetConfiguration(t *testing.T) { }, } - var reply autopilot.Config + var reply structs.AutopilotConfig if err := s.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil { t.Fatalf("err: %v", err) } @@ -150,7 +149,7 @@ func TestOperator_AutopilotCASConfiguration(t *testing.T) { }, } - var reply autopilot.Config + var reply structs.AutopilotConfig if err := s.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil { t.Fatalf("err: %v", err) } @@ -200,7 +199,6 @@ func TestOperator_AutopilotCASConfiguration(t *testing.T) { } func TestOperator_ServerHealth(t *testing.T) { - t.Parallel() httpTest(t, func(c *Config) { c.Server.RaftProtocol = 3 }, func(s *TestAgent) { @@ -259,47 +257,3 @@ func TestOperator_ServerHealth_Unhealthy(t *testing.T) { }) }) } - -func TestDurationFixer(t *testing.T) { - assert := assert.New(t) - obj := map[string]interface{}{ - "key1": []map[string]interface{}{ - { - "subkey1": "10s", - }, - { - "subkey2": "5d", - }, - }, - "key2": map[string]interface{}{ - "subkey3": "30s", - "subkey4": "20m", - }, - "key3": "11s", - "key4": "49h", - } - expected := map[string]interface{}{ - "key1": []map[string]interface{}{ - { - "subkey1": 10 * time.Second, - }, - { - "subkey2": "5d", - }, - }, - "key2": map[string]interface{}{ - "subkey3": "30s", - "subkey4": 20 * time.Minute, - }, - "key3": "11s", - "key4": 49 * time.Hour, - } - - fixer := NewDurationFixer("key4", "subkey1", "subkey4") - if err := fixer.FixupDurations(obj); err != nil { - t.Fatal(err) - } - - // Ensure we only processed the intended fieldnames - assert.Equal(obj, expected) -} diff --git a/command/operator_autopilot_get.go b/command/operator_autopilot_get.go index b533b4749..c85c4e5c4 100644 --- a/command/operator_autopilot_get.go +++ b/command/operator_autopilot_get.go @@ -45,9 +45,9 @@ func (c *OperatorAutopilotGetCommand) Run(args []string) int { c.Ui.Output(fmt.Sprintf("LastContactThreshold = %v", config.LastContactThreshold.String())) c.Ui.Output(fmt.Sprintf("MaxTrailingLogs = %v", config.MaxTrailingLogs)) c.Ui.Output(fmt.Sprintf("ServerStabilizationTime = %v", config.ServerStabilizationTime.String())) - c.Ui.Output(fmt.Sprintf("RedundancyZoneTag = %q", config.RedundancyZoneTag)) + c.Ui.Output(fmt.Sprintf("EnableRedundancyZones = %v", config.EnableRedundancyZones)) c.Ui.Output(fmt.Sprintf("DisableUpgradeMigration = %v", config.DisableUpgradeMigration)) - c.Ui.Output(fmt.Sprintf("UpgradeVersionTag = %q", config.UpgradeVersionTag)) + c.Ui.Output(fmt.Sprintf("EnableCustomUpgrades = %v", config.EnableCustomUpgrades)) return 0 } diff --git a/command/operator_autopilot_set.go b/command/operator_autopilot_set.go index bacefe339..280a0413e 100644 --- a/command/operator_autopilot_set.go +++ b/command/operator_autopilot_set.go @@ -3,10 +3,8 @@ package command import ( "fmt" "strings" - "time" "github.com/hashicorp/consul/command/flags" - "github.com/hashicorp/nomad/api" "github.com/posener/complete" ) @@ -21,9 +19,9 @@ func (c *OperatorAutopilotSetCommand) AutocompleteFlags() complete.Flags { "-max-trailing-logs": complete.PredictAnything, "-last-contact-threshold": complete.PredictAnything, "-server-stabilization-time": complete.PredictAnything, - "-redundancy-zone-tag": complete.PredictAnything, + "-enable-redundancy-zones": complete.PredictAnything, "-disable-upgrade-migration": complete.PredictAnything, - "-upgrade-version-tag": complete.PredictAnything, + "-enable-custom-upgrades": complete.PredictAnything, }) } @@ -36,9 +34,9 @@ func (c *OperatorAutopilotSetCommand) Run(args []string) int { var maxTrailingLogs flags.UintValue var lastContactThreshold flags.DurationValue var serverStabilizationTime flags.DurationValue - var redundancyZoneTag flags.StringValue + var enableRedundancyZones flags.BoolValue var disableUpgradeMigration flags.BoolValue - var upgradeVersionTag flags.StringValue + var enableCustomUpgrades flags.BoolValue f := c.Meta.FlagSet("autopilot", FlagSetClient) f.Usage = func() { c.Ui.Output(c.Help()) } @@ -47,9 +45,9 @@ func (c *OperatorAutopilotSetCommand) Run(args []string) int { f.Var(&maxTrailingLogs, "max-trailing-logs", "") f.Var(&lastContactThreshold, "last-contact-threshold", "") f.Var(&serverStabilizationTime, "server-stabilization-time", "") - f.Var(&redundancyZoneTag, "redundancy-zone-tag", "") + f.Var(&enableRedundancyZones, "enable-redundancy-zones", "") f.Var(&disableUpgradeMigration, "disable-upgrade-migration", "") - f.Var(&upgradeVersionTag, "upgrade-version-tag", "") + f.Var(&enableCustomUpgrades, "enable-custom-upgrades", "") if err := f.Parse(args); err != nil { c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err)) @@ -73,21 +71,15 @@ func (c *OperatorAutopilotSetCommand) Run(args []string) int { // Update the config values based on the set flags. cleanupDeadServers.Merge(&conf.CleanupDeadServers) - redundancyZoneTag.Merge(&conf.RedundancyZoneTag) + enableRedundancyZones.Merge(&conf.EnableRedundancyZones) disableUpgradeMigration.Merge(&conf.DisableUpgradeMigration) - upgradeVersionTag.Merge(&conf.UpgradeVersionTag) + enableRedundancyZones.Merge(&conf.EnableCustomUpgrades) trailing := uint(conf.MaxTrailingLogs) maxTrailingLogs.Merge(&trailing) conf.MaxTrailingLogs = uint64(trailing) - - last := time.Duration(*conf.LastContactThreshold) - lastContactThreshold.Merge(&last) - conf.LastContactThreshold = api.NewReadableDuration(last) - - stablization := time.Duration(*conf.ServerStabilizationTime) - serverStabilizationTime.Merge(&stablization) - conf.ServerStabilizationTime = api.NewReadableDuration(stablization) + lastContactThreshold.Merge(&conf.LastContactThreshold) + serverStabilizationTime.Merge(&conf.ServerStabilizationTime) // Check-and-set the new configuration. result, err := operator.AutopilotCASConfiguration(conf, nil) diff --git a/command/operator_autopilot_set_test.go b/command/operator_autopilot_set_test.go index 8991ce51e..f76f75b5d 100644 --- a/command/operator_autopilot_set_test.go +++ b/command/operator_autopilot_set_test.go @@ -53,10 +53,10 @@ func TestOperatorAutopilotSetConfigCommmand(t *testing.T) { if conf.MaxTrailingLogs != 99 { t.Fatalf("bad: %#v", conf) } - if conf.LastContactThreshold.Duration() != 123*time.Millisecond { + if conf.LastContactThreshold != 123*time.Millisecond { t.Fatalf("bad: %#v", conf) } - if conf.ServerStabilizationTime.Duration() != 123*time.Millisecond { + if conf.ServerStabilizationTime != 123*time.Millisecond { t.Fatalf("bad: %#v", conf) } } diff --git a/nomad/autopilot.go b/nomad/autopilot.go index 5fd2a9b37..4c5ea180b 100644 --- a/nomad/autopilot.go +++ b/nomad/autopilot.go @@ -10,13 +10,40 @@ import ( "github.com/hashicorp/serf/serf" ) +const ( + AutopilotRZTag = "ap_zone" + AutopilotVersionTag = "ap_version" +) + // AutopilotDelegate is a Nomad delegate for autopilot operations. type AutopilotDelegate struct { server *Server } func (d *AutopilotDelegate) AutopilotConfig() *autopilot.Config { - return d.server.getOrCreateAutopilotConfig() + c := d.server.getOrCreateAutopilotConfig() + if c == nil { + return nil + } + + conf := &autopilot.Config{ + CleanupDeadServers: c.CleanupDeadServers, + LastContactThreshold: c.LastContactThreshold, + MaxTrailingLogs: c.MaxTrailingLogs, + ServerStabilizationTime: c.ServerStabilizationTime, + DisableUpgradeMigration: c.DisableUpgradeMigration, + ModifyIndex: c.ModifyIndex, + CreateIndex: c.CreateIndex, + } + + if c.EnableRedundancyZones { + conf.RedundancyZoneTag = AutopilotRZTag + } + if c.EnableCustomUpgrades { + conf.UpgradeVersionTag = AutopilotVersionTag + } + + return conf } func (d *AutopilotDelegate) FetchStats(ctx context.Context, servers []serf.Member) map[string]*autopilot.ServerStats { diff --git a/nomad/autopilot_test.go b/nomad/autopilot_test.go index 6511c8be9..13cee5044 100644 --- a/nomad/autopilot_test.go +++ b/nomad/autopilot_test.go @@ -270,8 +270,11 @@ func TestAutopilot_CleanupStaleRaftServer(t *testing.T) { testutil.WaitForLeader(t, s1.RPC) // Add s4 to peers directly - addr := fmt.Sprintf("127.0.0.1:%d", s4.config.SerfConfig.MemberlistConfig.BindPort) - s1.raft.AddVoter(raft.ServerID(s4.config.NodeID), raft.ServerAddress(addr), 0, 0) + addr := fmt.Sprintf("127.0.0.1:%d", s4.config.RPCAddr.Port) + future := s1.raft.AddVoter(raft.ServerID(s4.config.NodeID), raft.ServerAddress(addr), 0, 0) + if err := future.Error(); err != nil { + t.Fatal(err) + } // Verify we have 4 peers peers, err := s1.numPeers() diff --git a/nomad/config.go b/nomad/config.go index 5fd8dad8e..a2c553eb2 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -8,7 +8,6 @@ import ( "runtime" "time" - "github.com/hashicorp/consul/agent/consul/autopilot" "github.com/hashicorp/memberlist" "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/nomad/helper/uuid" @@ -98,6 +97,13 @@ type Config struct { // as a voting member of the Raft cluster. NonVoter bool + // (Enterprise-only) RedundancyZone is the redundancy zone to use for this server. + RedundancyZone string + + // (Enterprise-only) UpgradeVersion is the custom upgrade version to use when + // performing upgrade migrations. + UpgradeVersion string + // SerfConfig is the configuration for the serf cluster SerfConfig *serf.Config @@ -269,7 +275,7 @@ type Config struct { // AutopilotConfig is used to apply the initial autopilot config when // bootstrapping. - AutopilotConfig *autopilot.Config + AutopilotConfig *structs.AutopilotConfig // ServerHealthInterval is the frequency with which the health of the // servers in the cluster will be updated. @@ -339,7 +345,7 @@ func DefaultConfig() *Config { TLSConfig: &config.TLSConfig{}, ReplicationBackoff: 30 * time.Second, SentinelGCInterval: 30 * time.Second, - AutopilotConfig: &autopilot.Config{ + AutopilotConfig: &structs.AutopilotConfig{ CleanupDeadServers: true, LastContactThreshold: 200 * time.Millisecond, MaxTrailingLogs: 250, diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 90c0b6c12..aa16be373 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -10,7 +10,6 @@ import ( "time" "github.com/google/go-cmp/cmp" - "github.com/hashicorp/consul/agent/consul/autopilot" memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/mock" @@ -2319,7 +2318,7 @@ func TestFSM_Autopilot(t *testing.T) { // Set the autopilot config using a request. req := structs.AutopilotSetConfigRequest{ Datacenter: "dc1", - Config: autopilot.Config{ + Config: structs.AutopilotConfig{ CleanupDeadServers: true, LastContactThreshold: 10 * time.Second, MaxTrailingLogs: 300, diff --git a/nomad/leader.go b/nomad/leader.go index 712fa75b9..4d1a571cc 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -13,7 +13,6 @@ import ( "golang.org/x/time/rate" "github.com/armon/go-metrics" - "github.com/hashicorp/consul/agent/consul/autopilot" memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/go-version" "github.com/hashicorp/nomad/helper/uuid" @@ -1174,7 +1173,7 @@ func diffACLTokens(state *state.StateStore, minIndex uint64, remoteList []*struc } // getOrCreateAutopilotConfig is used to get the autopilot config, initializing it if necessary -func (s *Server) getOrCreateAutopilotConfig() *autopilot.Config { +func (s *Server) getOrCreateAutopilotConfig() *structs.AutopilotConfig { state := s.fsm.State() _, config, err := state.AutopilotConfig() if err != nil { diff --git a/nomad/operator_endpoint.go b/nomad/operator_endpoint.go index b0a54d700..e6c992132 100644 --- a/nomad/operator_endpoint.go +++ b/nomad/operator_endpoint.go @@ -192,7 +192,7 @@ REMOVE: } // AutopilotGetConfiguration is used to retrieve the current Autopilot configuration. -func (op *Operator) AutopilotGetConfiguration(args *structs.GenericRequest, reply *autopilot.Config) error { +func (op *Operator) AutopilotGetConfiguration(args *structs.GenericRequest, reply *structs.AutopilotConfig) error { if done, err := op.srv.forward("Operator.AutopilotGetConfiguration", args, args, reply); done { return err } diff --git a/nomad/server.go b/nomad/server.go index d3faa60f5..09b7964c7 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -1105,6 +1105,12 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) ( if s.config.NonVoter { conf.Tags["nonvoter"] = "1" } + if s.config.RedundancyZone != "" { + conf.Tags[AutopilotRZTag] = s.config.RedundancyZone + } + if s.config.UpgradeVersion != "" { + conf.Tags[AutopilotVersionTag] = s.config.UpgradeVersion + } conf.MemberlistConfig.LogOutput = s.config.LogOutput conf.LogOutput = s.config.LogOutput conf.EventCh = ch diff --git a/nomad/state/autopilot.go b/nomad/state/autopilot.go index 65654ca79..83613817d 100644 --- a/nomad/state/autopilot.go +++ b/nomad/state/autopilot.go @@ -3,8 +3,8 @@ package state import ( "fmt" - "github.com/hashicorp/consul/agent/consul/autopilot" "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/nomad/structs" ) // autopilotConfigTableSchema returns a new table schema used for storing @@ -26,7 +26,7 @@ func autopilotConfigTableSchema() *memdb.TableSchema { } // AutopilotConfig is used to get the current Autopilot configuration. -func (s *StateStore) AutopilotConfig() (uint64, *autopilot.Config, error) { +func (s *StateStore) AutopilotConfig() (uint64, *structs.AutopilotConfig, error) { tx := s.db.Txn(false) defer tx.Abort() @@ -36,7 +36,7 @@ func (s *StateStore) AutopilotConfig() (uint64, *autopilot.Config, error) { return 0, nil, fmt.Errorf("failed autopilot config lookup: %s", err) } - config, ok := c.(*autopilot.Config) + config, ok := c.(*structs.AutopilotConfig) if !ok { return 0, nil, nil } @@ -45,7 +45,7 @@ func (s *StateStore) AutopilotConfig() (uint64, *autopilot.Config, error) { } // AutopilotSetConfig is used to set the current Autopilot configuration. -func (s *StateStore) AutopilotSetConfig(idx uint64, config *autopilot.Config) error { +func (s *StateStore) AutopilotSetConfig(idx uint64, config *structs.AutopilotConfig) error { tx := s.db.Txn(true) defer tx.Abort() @@ -58,7 +58,7 @@ func (s *StateStore) AutopilotSetConfig(idx uint64, config *autopilot.Config) er // AutopilotCASConfig is used to try updating the Autopilot configuration with a // given Raft index. If the CAS index specified is not equal to the last observed index // for the config, then the call is a noop, -func (s *StateStore) AutopilotCASConfig(idx, cidx uint64, config *autopilot.Config) (bool, error) { +func (s *StateStore) AutopilotCASConfig(idx, cidx uint64, config *structs.AutopilotConfig) (bool, error) { tx := s.db.Txn(true) defer tx.Abort() @@ -71,7 +71,7 @@ func (s *StateStore) AutopilotCASConfig(idx, cidx uint64, config *autopilot.Conf // If the existing index does not match the provided CAS // index arg, then we shouldn't update anything and can safely // return early here. - e, ok := existing.(*autopilot.Config) + e, ok := existing.(*structs.AutopilotConfig) if !ok || e.ModifyIndex != cidx { return false, nil } @@ -82,7 +82,7 @@ func (s *StateStore) AutopilotCASConfig(idx, cidx uint64, config *autopilot.Conf return true, nil } -func (s *StateStore) autopilotSetConfigTxn(idx uint64, tx *memdb.Txn, config *autopilot.Config) error { +func (s *StateStore) autopilotSetConfigTxn(idx uint64, tx *memdb.Txn, config *structs.AutopilotConfig) error { // Check for an existing config existing, err := tx.First("autopilot-config", "id") if err != nil { @@ -91,7 +91,7 @@ func (s *StateStore) autopilotSetConfigTxn(idx uint64, tx *memdb.Txn, config *au // Set the indexes. if existing != nil { - config.CreateIndex = existing.(*autopilot.Config).CreateIndex + config.CreateIndex = existing.(*structs.AutopilotConfig).CreateIndex } else { config.CreateIndex = idx } diff --git a/nomad/state/autopilot_test.go b/nomad/state/autopilot_test.go index 59bf7b417..a43ddebe8 100644 --- a/nomad/state/autopilot_test.go +++ b/nomad/state/autopilot_test.go @@ -5,20 +5,20 @@ import ( "testing" "time" - "github.com/hashicorp/consul/agent/consul/autopilot" + "github.com/hashicorp/nomad/nomad/structs" ) func TestStateStore_Autopilot(t *testing.T) { s := testStateStore(t) - expected := &autopilot.Config{ + expected := &structs.AutopilotConfig{ CleanupDeadServers: true, LastContactThreshold: 5 * time.Second, MaxTrailingLogs: 500, ServerStabilizationTime: 100 * time.Second, - RedundancyZoneTag: "az", + EnableRedundancyZones: true, DisableUpgradeMigration: true, - UpgradeVersionTag: "build", + EnableCustomUpgrades: true, } if err := s.AutopilotSetConfig(0, expected); err != nil { @@ -40,7 +40,7 @@ func TestStateStore_Autopilot(t *testing.T) { func TestStateStore_AutopilotCAS(t *testing.T) { s := testStateStore(t) - expected := &autopilot.Config{ + expected := &structs.AutopilotConfig{ CleanupDeadServers: true, } @@ -52,7 +52,7 @@ func TestStateStore_AutopilotCAS(t *testing.T) { } // Do a CAS with an index lower than the entry - ok, err := s.AutopilotCASConfig(2, 0, &autopilot.Config{ + ok, err := s.AutopilotCASConfig(2, 0, &structs.AutopilotConfig{ CleanupDeadServers: false, }) if ok || err != nil { @@ -73,7 +73,7 @@ func TestStateStore_AutopilotCAS(t *testing.T) { } // Do another CAS, this time with the correct index - ok, err = s.AutopilotCASConfig(2, 1, &autopilot.Config{ + ok, err = s.AutopilotCASConfig(2, 1, &structs.AutopilotConfig{ CleanupDeadServers: false, }) if !ok || err != nil { diff --git a/nomad/structs/config/autopilot.go b/nomad/structs/config/autopilot.go index b1501b82f..ffa52bc09 100644 --- a/nomad/structs/config/autopilot.go +++ b/nomad/structs/config/autopilot.go @@ -24,25 +24,23 @@ type AutopilotConfig struct { // be behind before being considered unhealthy. MaxTrailingLogs int `mapstructure:"max_trailing_logs"` - // (Enterprise-only) RedundancyZoneTag is the node tag to use for separating - // servers into zones for redundancy. If left blank, this feature will be disabled. - RedundancyZoneTag string `mapstructure:"redundancy_zone_tag"` + // (Enterprise-only) EnableRedundancyZones specifies whether to enable redundancy zones. + EnableRedundancyZones *bool `mapstructure:"enable_redundancy_zones"` // (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration // strategy of waiting until enough newer-versioned servers have been added to the // cluster before promoting them to voters. DisableUpgradeMigration *bool `mapstructure:"disable_upgrade_migration"` - // (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when - // performing upgrade migrations. If left blank, the Nomad version will be used. - UpgradeVersionTag string `mapstructure:"upgrade_version_tag"` + // (Enterprise-only) EnableCustomUpgrades specifies whether to enable using custom + // upgrade versions when performing migrations. + EnableCustomUpgrades *bool `mapstructure:"enable_custom_upgrades"` } // DefaultAutopilotConfig() returns the canonical defaults for the Nomad // `autopilot` configuration. func DefaultAutopilotConfig() *AutopilotConfig { return &AutopilotConfig{ - CleanupDeadServers: helper.BoolToPtr(true), LastContactThreshold: 200 * time.Millisecond, MaxTrailingLogs: 250, ServerStabilizationTime: 10 * time.Second, @@ -64,14 +62,14 @@ func (a *AutopilotConfig) Merge(b *AutopilotConfig) *AutopilotConfig { if b.MaxTrailingLogs != 0 { result.MaxTrailingLogs = b.MaxTrailingLogs } - if b.RedundancyZoneTag != "" { - result.RedundancyZoneTag = b.RedundancyZoneTag + if b.EnableRedundancyZones != nil { + result.EnableRedundancyZones = b.EnableRedundancyZones } if b.DisableUpgradeMigration != nil { result.DisableUpgradeMigration = helper.BoolToPtr(*b.DisableUpgradeMigration) } - if b.UpgradeVersionTag != "" { - result.UpgradeVersionTag = b.UpgradeVersionTag + if b.EnableCustomUpgrades != nil { + result.EnableCustomUpgrades = b.EnableCustomUpgrades } return result @@ -90,9 +88,15 @@ func (a *AutopilotConfig) Copy() *AutopilotConfig { if a.CleanupDeadServers != nil { nc.CleanupDeadServers = helper.BoolToPtr(*a.CleanupDeadServers) } + if a.EnableRedundancyZones != nil { + nc.EnableRedundancyZones = helper.BoolToPtr(*a.EnableRedundancyZones) + } if a.DisableUpgradeMigration != nil { nc.DisableUpgradeMigration = helper.BoolToPtr(*a.DisableUpgradeMigration) } + if a.EnableCustomUpgrades != nil { + nc.EnableCustomUpgrades = helper.BoolToPtr(*a.EnableCustomUpgrades) + } return nc } diff --git a/nomad/structs/config/autopilot_test.go b/nomad/structs/config/autopilot_test.go index 1dcb725a0..644541c0a 100644 --- a/nomad/structs/config/autopilot_test.go +++ b/nomad/structs/config/autopilot_test.go @@ -14,9 +14,9 @@ func TestAutopilotConfig_Merge(t *testing.T) { ServerStabilizationTime: 1 * time.Second, LastContactThreshold: 1 * time.Second, MaxTrailingLogs: 1, - RedundancyZoneTag: "1", + EnableRedundancyZones: &trueValue, DisableUpgradeMigration: &falseValue, - UpgradeVersionTag: "1", + EnableCustomUpgrades: &trueValue, } c2 := &AutopilotConfig{ @@ -24,9 +24,9 @@ func TestAutopilotConfig_Merge(t *testing.T) { ServerStabilizationTime: 2 * time.Second, LastContactThreshold: 2 * time.Second, MaxTrailingLogs: 2, - RedundancyZoneTag: "2", + EnableRedundancyZones: nil, DisableUpgradeMigration: nil, - UpgradeVersionTag: "2", + EnableCustomUpgrades: nil, } e := &AutopilotConfig{ @@ -34,9 +34,9 @@ func TestAutopilotConfig_Merge(t *testing.T) { ServerStabilizationTime: 2 * time.Second, LastContactThreshold: 2 * time.Second, MaxTrailingLogs: 2, - RedundancyZoneTag: "2", + EnableRedundancyZones: &trueValue, DisableUpgradeMigration: &falseValue, - UpgradeVersionTag: "2", + EnableCustomUpgrades: &trueValue, } result := c1.Merge(c2) diff --git a/nomad/structs/operator.go b/nomad/structs/operator.go index fe83ec86f..43bd6a420 100644 --- a/nomad/structs/operator.go +++ b/nomad/structs/operator.go @@ -1,7 +1,8 @@ package structs import ( - "github.com/hashicorp/consul/agent/consul/autopilot" + "time" + "github.com/hashicorp/raft" ) @@ -69,7 +70,7 @@ type AutopilotSetConfigRequest struct { Datacenter string // Config is the new Autopilot configuration to use. - Config autopilot.Config + Config AutopilotConfig // CAS controls whether to use check-and-set semantics for this request. CAS bool @@ -82,3 +83,39 @@ type AutopilotSetConfigRequest struct { func (op *AutopilotSetConfigRequest) RequestDatacenter() string { return op.Datacenter } + +// AutopilotConfig is the internal config for the Autopilot mechanism. +type AutopilotConfig struct { + // CleanupDeadServers controls whether to remove dead servers when a new + // server is added to the Raft peers. + CleanupDeadServers bool + + // ServerStabilizationTime is the minimum amount of time a server must be + // in a stable, healthy state before it can be added to the cluster. Only + // applicable with Raft protocol version 3 or higher. + ServerStabilizationTime time.Duration + + // LastContactThreshold is the limit on the amount of time a server can go + // without leader contact before being considered unhealthy. + LastContactThreshold time.Duration + + // MaxTrailingLogs is the amount of entries in the Raft Log that a server can + // be behind before being considered unhealthy. + MaxTrailingLogs uint64 + + // (Enterprise-only) EnableRedundancyZones specifies whether to enable redundancy zones. + EnableRedundancyZones bool + + // (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration + // strategy of waiting until enough newer-versioned servers have been added to the + // cluster before promoting them to voters. + DisableUpgradeMigration bool + + // (Enterprise-only) EnableCustomUpgrades specifies whether to enable using custom + // upgrade versions when performing migrations. + EnableCustomUpgrades bool + + // CreateIndex/ModifyIndex store the create/modify indexes of this configuration. + CreateIndex uint64 + ModifyIndex uint64 +} diff --git a/nomad/util.go b/nomad/util.go index 8b02a585f..be01dc418 100644 --- a/nomad/util.go +++ b/nomad/util.go @@ -46,7 +46,6 @@ type serverParts struct { MinorVersion int Build version.Version RaftVersion int - NonVoter bool Addr net.Addr RPCAddr net.Addr Status serf.MemberStatus @@ -71,7 +70,6 @@ func isNomadServer(m serf.Member) (bool, *serverParts) { region := m.Tags["region"] datacenter := m.Tags["dc"] _, bootstrap := m.Tags["bootstrap"] - _, nonVoter := m.Tags["nonvoter"] expect := 0 expectStr, ok := m.Tags["expect"] @@ -140,7 +138,6 @@ func isNomadServer(m serf.Member) (bool, *serverParts) { MinorVersion: minorVersion, Build: *buildVersion, RaftVersion: raftVsn, - NonVoter: nonVoter, Status: m.Status, } return true, parts diff --git a/website/source/api/operator.html.md b/website/source/api/operator.html.md index 98226bfe9..edc6115de 100644 --- a/website/source/api/operator.html.md +++ b/website/source/api/operator.html.md @@ -168,9 +168,9 @@ $ curl \ "LastContactThreshold": "200ms", "MaxTrailingLogs": 250, "ServerStabilizationTime": "10s", - "RedundancyZoneTag": "", + "EnableRedundancyZones": false, "DisableUpgradeMigration": false, - "UpgradeVersionTag": "", + "EnableCustomUpgrades": false, "CreateIndex": 4, "ModifyIndex": 4 } @@ -221,19 +221,16 @@ The table below shows this endpoint's support for cluster. Only takes effect if all servers are running Raft protocol version 3 or higher. Must be a duration value such as `30s`. -- `RedundancyZoneTag` `(string: "")` - Controls the node-meta key to use when - Autopilot is separating servers into zones for redundancy. Only one server in - each zone can be a voting member at one time. If left blank, this feature will - be disabled. +- `EnableRedundancyZones` `(bool: false)` - (Enterprise-only) Specifies whether + to enable redundancy zones. -- `DisableUpgradeMigration` `(bool: false)` - Disables Autopilot's upgrade - migration strategy in Nomad Enterprise of waiting until enough +- `DisableUpgradeMigration` `(bool: false)` - (Enterprise-only) Disables Autopilot's + upgrade migration strategy in Nomad Enterprise of waiting until enough newer-versioned servers have been added to the cluster before promoting any of them to voters. -- `UpgradeVersionTag` `(string: "")` - Controls the node-meta key to use for - version info when performing upgrade migrations. If left blank, the Nomad - version will be used. +- `EnableCustomUpgrades` `(bool: false)` - (Enterprise-only) Specifies whether to + enable using custom upgrade versions when performing migrations. ### Sample Payload @@ -243,9 +240,9 @@ The table below shows this endpoint's support for "LastContactThreshold": "200ms", "MaxTrailingLogs": 250, "ServerStabilizationTime": "10s", - "RedundancyZoneTag": "", + "EnableRedundancyZones": false, "DisableUpgradeMigration": false, - "UpgradeVersionTag": "", + "EnableCustomUpgrades": false, "CreateIndex": 4, "ModifyIndex": 4 } diff --git a/website/source/docs/agent/configuration/autopilot.html.md b/website/source/docs/agent/configuration/autopilot.html.md index ed13f37aa..5328c0174 100644 --- a/website/source/docs/agent/configuration/autopilot.html.md +++ b/website/source/docs/agent/configuration/autopilot.html.md @@ -18,6 +18,7 @@ description: |- The `autopilot` stanza configures the Nomad agent to configure Autopilot behavior. +For more information about Autopilot, see the [Autopilot Guide](/guides/cluster/autopilot.html). ```hcl autopilot { @@ -25,9 +26,9 @@ autopilot { last_contact_threshold = "200ms" max_trailing_logs = 250 server_stabilization_time = "10s" - redundancy_zone_tag = "" - disable_upgrade_migration = true - upgrade_version_tag = "" + enable_redundancy_zones = false + disable_upgrade_migration = false + enable_custom_upgrades = false } ``` @@ -48,17 +49,17 @@ autopilot { cluster. Only takes effect if all servers are running Raft protocol version 3 or higher. Must be a duration value such as `30s`. -- `redundancy_zone_tag` `(string: "")` - Controls the node-meta key to use when - Autopilot is separating servers into zones for redundancy. Only one server in - each zone can be a voting member at one time. If left blank, this feature will - be disabled. +- `enable_redundancy_zones` `(bool: false)` - (Enterprise-only) Controls whether + Autopilot separates servers into zones for redundancy, in conjunction with the + [redundancy_zone](/docs/agent/configuration/server.html#redundancy_zone) parameter. + Only one server in each zone can be a voting member at one time. -- `disable_upgrade_migration` `(bool: false)` - Disables Autopilot's upgrade - migration strategy in Nomad Enterprise of waiting until enough +- `disable_upgrade_migration` `(bool: false)` - (Enterprise-only) Disables Autopilot's + upgrade migration strategy in Nomad Enterprise of waiting until enough newer-versioned servers have been added to the cluster before promoting any of them to voters. -- `upgrade_version_tag` `(string: "")` - Controls the node-meta key to use for - version info when performing upgrade migrations. If left blank, the Nomad - version will be used. +- `enable_custom_upgrades` `(bool: false)` - (Enterprise-only) Specifies whether to + enable using custom upgrade versions when performing migrations, in conjunction with + the [upgrade_version](/docs/agent/configuration/server.html#upgrade_version) parameter. diff --git a/website/source/docs/agent/configuration/server.html.md b/website/source/docs/agent/configuration/server.html.md index 92854ae18..56b5e4abc 100644 --- a/website/source/docs/agent/configuration/server.html.md +++ b/website/source/docs/agent/configuration/server.html.md @@ -102,8 +102,9 @@ server { second is a tradeoff as it lowers failure detection time of nodes at the tradeoff of false positives and increased load on the leader. -- `non_voting_server` `(bool: false)` - is whether this server will act as - a non-voting member of the cluster to help provide read scalability. (Enterprise-only) +- `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether + this server will act as a non-voting member of the cluster to help provide + read scalability. - `num_schedulers` `(int: [num-cores])` - Specifies the number of parallel scheduler threads to run. This can be as many as one per core, or `0` to @@ -120,6 +121,10 @@ server { features and is typically not required as the agent internally knows the latest version, but may be useful in some upgrade scenarios. +- `redundancy_zone` `(string: "")` - (Enterprise-only) Specifies the redundancy + zone that this server will be a part of for Autopilot management. For more + information, see the [Autopilot Guide](/guides/cluster/autopilot.html). + - `rejoin_after_leave` `(bool: false)` - Specifies if Nomad will ignore a previous leave and attempt to rejoin the cluster when starting. By default, Nomad treats leave as a permanent intent and does not attempt to join the @@ -149,6 +154,10 @@ server { [server address format](#server-address-format) section for more information on the format of the string. +- `upgrade_version` `(string: "")` - A custom version of the format X.Y.Z to use + in place of the Nomad version when custom upgrades are enabled in Autopilot. + For more information, see the [Autopilot Guide](/guides/cluster/autopilot.html). + ### Server Address Format This section describes the acceptable syntax and format for describing the diff --git a/website/source/guides/cluster/autopilot.html.md b/website/source/guides/cluster/autopilot.html.md index 24e390cb0..a3af1d0fa 100644 --- a/website/source/guides/cluster/autopilot.html.md +++ b/website/source/guides/cluster/autopilot.html.md @@ -32,9 +32,9 @@ autopilot { last_contact_threshold = 200ms max_trailing_logs = 250 server_stabilization_time = "10s" - redundancy_zone_tag = "az" + enable_redundancy_zones = false disable_upgrade_migration = false - upgrade_version_tag = "" + enable_custom_upgrades = false } ``` @@ -49,9 +49,9 @@ CleanupDeadServers = true LastContactThreshold = 200ms MaxTrailingLogs = 250 ServerStabilizationTime = 10s -RedundancyZoneTag = "" +EnableRedundancyZones = false DisableUpgradeMigration = false -UpgradeVersionTag = "" +EnableCustomUpgrades = false $ Nomad operator autopilot set-config -cleanup-dead-servers=false Configuration updated! @@ -61,9 +61,9 @@ CleanupDeadServers = false LastContactThreshold = 200ms MaxTrailingLogs = 250 ServerStabilizationTime = 10s -RedundancyZoneTag = "" +EnableRedundancyZones = false DisableUpgradeMigration = false -UpgradeVersionTag = "" +EnableCustomUpgrades = false ``` ## Dead Server Cleanup @@ -164,15 +164,21 @@ isolated failure domains such as AWS Availability Zones; users would be forced t have an overly-large quorum (2-3 nodes per AZ) or give up redundancy within an AZ by deploying just one server in each. -If the `RedundancyZoneTag` setting is set, Nomad will use its value to look for a -zone in each server's specified [`-meta`](/docs/agent/configuration/client.html#meta) -tag. For example, if `RedundancyZoneTag` is set to `zone`, and `-meta zone=east1a` -is used when starting a server, that server's redundancy zone will be `east1a`. +If the `EnableRedundancyZones` setting is set, Nomad will use its value to look for a +zone in each server's specified [`redundancy_zone`] +(/docs/agent/configuration/server.html#redundancy_zone) field. Here's an example showing how to configure this: +```hcl +/* config.hcl */ +server { + redundancy_zone = "west-1" +} ``` -$ nomad operator autopilot set-config -redundancy-zone-tag=zone + +``` +$ nomad operator autopilot set-config -enable-redundancy-zones=true Configuration updated! ``` @@ -193,11 +199,11 @@ to voters and demoting the old servers. After this is finished, the old servers safely removed from the cluster. To check the Nomad version of the servers, either the [autopilot health] -(/api/operator.html#read-health) endpoint or the `Nomad members` +(/api/operator.html#read-health) endpoint or the `nomad members` command can be used: ``` -$ Nomad members +$ nomad members Node Address Status Type Build Protocol DC node1 127.0.0.1:8301 alive server 0.7.5 2 dc1 node2 127.0.0.1:8703 alive server 0.7.5 2 dc1 @@ -207,13 +213,11 @@ node4 127.0.0.1:8203 alive server 0.8.0 2 dc1 ### Migrations Without a Nomad Version Change -The `UpgradeVersionTag` can be used to override the version information used during +The `EnableCustomUpgrades` field can be used to override the version information used during a migration, so that the migration logic can be used for updating the cluster when changing configuration. -If the `UpgradeVersionTag` setting is set, Nomad will use its value to look for a -version in each server's specified [`-meta`](/docs/agent/configuration/client.html#meta) -tag. For example, if `UpgradeVersionTag` is set to `build`, and `-meta build:0.0.2` -is used when starting a server, that server's version will be `0.0.2` when considered in -a migration. The upgrade logic will follow semantic versioning and the version string +If the `EnableCustomUpgrades` setting is set to `true`, Nomad will use its value to look for a +version in each server's specified [`upgrade_version`](/docs/agent/configuration/server.html#upgrade_version) +tag. The upgrade logic will follow semantic versioning and the `upgrade_version` must be in the form of either `X`, `X.Y`, or `X.Y.Z`. From 0eb0acacdcedecf29b4d8d9ed4b905c2645a4193 Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Tue, 30 Jan 2018 15:14:03 -0800 Subject: [PATCH 2/3] Fix remaining issues with autopilot change --- command/agent/config-test-fixtures/basic.hcl | 6 +++--- command/agent/http.go | 19 ------------------- command/operator_autopilot_set.go | 6 +++--- nomad/autopilot.go | 7 ++++++- nomad/util_test.go | 4 ---- .../source/guides/cluster/autopilot.html.md | 6 +++--- 6 files changed, 15 insertions(+), 33 deletions(-) diff --git a/command/agent/config-test-fixtures/basic.hcl b/command/agent/config-test-fixtures/basic.hcl index 4ddc65771..d7731efc6 100644 --- a/command/agent/config-test-fixtures/basic.hcl +++ b/command/agent/config-test-fixtures/basic.hcl @@ -83,9 +83,9 @@ server { retry_interval = "15s" rejoin_after_leave = true non_voting_server = true - redundancy_zone = "foo" - upgrade_version = "0.8.0" - encrypt = "abc" + redundancy_zone = "foo" + upgrade_version = "0.8.0" + encrypt = "abc" } acl { enabled = true diff --git a/command/agent/http.go b/command/agent/http.go index dddb4baf8..dc147b142 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -18,7 +18,6 @@ import ( assetfs "github.com/elazarl/go-bindata-assetfs" "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/nomad/nomad/structs" - "github.com/mitchellh/mapstructure" "github.com/rs/cors" "github.com/ugorji/go/codec" ) @@ -346,24 +345,6 @@ func decodeBody(req *http.Request, out interface{}) error { return dec.Decode(&out) } -// decodeBodyFunc is used to decode a JSON request body invoking -// a given callback function -func decodeBodyFunc(req *http.Request, out interface{}, cb func(interface{}) error) error { - var raw interface{} - dec := json.NewDecoder(req.Body) - if err := dec.Decode(&raw); err != nil { - return err - } - - // Invoke the callback prior to decode - if cb != nil { - if err := cb(raw); err != nil { - return err - } - } - return mapstructure.Decode(raw, out) -} - // setIndex is used to set the index response header func setIndex(resp http.ResponseWriter, index uint64) { resp.Header().Set("X-Nomad-Index", strconv.FormatUint(index, 10)) diff --git a/command/operator_autopilot_set.go b/command/operator_autopilot_set.go index 280a0413e..3e8873279 100644 --- a/command/operator_autopilot_set.go +++ b/command/operator_autopilot_set.go @@ -19,9 +19,9 @@ func (c *OperatorAutopilotSetCommand) AutocompleteFlags() complete.Flags { "-max-trailing-logs": complete.PredictAnything, "-last-contact-threshold": complete.PredictAnything, "-server-stabilization-time": complete.PredictAnything, - "-enable-redundancy-zones": complete.PredictAnything, - "-disable-upgrade-migration": complete.PredictAnything, - "-enable-custom-upgrades": complete.PredictAnything, + "-enable-redundancy-zones": complete.PredictNothing, + "-disable-upgrade-migration": complete.PredictNothing, + "-enable-custom-upgrades": complete.PredictNothing, }) } diff --git a/nomad/autopilot.go b/nomad/autopilot.go index 4c5ea180b..399b3458f 100644 --- a/nomad/autopilot.go +++ b/nomad/autopilot.go @@ -11,7 +11,12 @@ import ( ) const ( - AutopilotRZTag = "ap_zone" + // AutopilotRZTag is the Serf tag to use for the redundancy zone value + // when passing the server metadata to Autopilot. + AutopilotRZTag = "ap_zone" + + // AutopilotRZTag is the Serf tag to use for the custom version value + // when passing the server metadata to Autopilot. AutopilotVersionTag = "ap_version" ) diff --git a/nomad/util_test.go b/nomad/util_test.go index 12db5100c..3e1db6296 100644 --- a/nomad/util_test.go +++ b/nomad/util_test.go @@ -24,7 +24,6 @@ func TestIsNomadServer(t *testing.T) { "port": "10000", "vsn": "1", "raft_vsn": "2", - "nonvoter": "1", "build": "0.7.0+ent", }, } @@ -51,9 +50,6 @@ func TestIsNomadServer(t *testing.T) { if parts.RPCAddr.String() != "1.1.1.1:10000" { t.Fatalf("bad: %v", parts.RPCAddr.String()) } - if !parts.NonVoter { - t.Fatalf("bad: %v", parts.NonVoter) - } if seg := parts.Build.Segments(); len(seg) != 3 { t.Fatalf("bad: %v", parts.Build) } else if seg[0] != 0 && seg[1] != 7 && seg[2] != 0 { diff --git a/website/source/guides/cluster/autopilot.html.md b/website/source/guides/cluster/autopilot.html.md index a3af1d0fa..6e7d53422 100644 --- a/website/source/guides/cluster/autopilot.html.md +++ b/website/source/guides/cluster/autopilot.html.md @@ -53,10 +53,10 @@ EnableRedundancyZones = false DisableUpgradeMigration = false EnableCustomUpgrades = false -$ Nomad operator autopilot set-config -cleanup-dead-servers=false +$ nomad operator autopilot set-config -cleanup-dead-servers=false Configuration updated! -$ Nomad operator autopilot get-config +$ nomad operator autopilot get-config CleanupDeadServers = false LastContactThreshold = 200ms MaxTrailingLogs = 250 @@ -203,7 +203,7 @@ To check the Nomad version of the servers, either the [autopilot health] command can be used: ``` -$ nomad members +$ nomad server-members Node Address Status Type Build Protocol DC node1 127.0.0.1:8301 alive server 0.7.5 2 dc1 node2 127.0.0.1:8703 alive server 0.7.5 2 dc1 From 5cec4e794e1212c09fd6267f17bacd88d5801f64 Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Tue, 30 Jan 2018 16:10:38 -0800 Subject: [PATCH 3/3] Fix autopilot guide example --- website/source/guides/cluster/autopilot.html.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/website/source/guides/cluster/autopilot.html.md b/website/source/guides/cluster/autopilot.html.md index 6e7d53422..15f650732 100644 --- a/website/source/guides/cluster/autopilot.html.md +++ b/website/source/guides/cluster/autopilot.html.md @@ -204,11 +204,11 @@ command can be used: ``` $ nomad server-members -Node Address Status Type Build Protocol DC -node1 127.0.0.1:8301 alive server 0.7.5 2 dc1 -node2 127.0.0.1:8703 alive server 0.7.5 2 dc1 -node3 127.0.0.1:8803 alive server 0.7.5 2 dc1 -node4 127.0.0.1:8203 alive server 0.8.0 2 dc1 +Name Address Port Status Leader Protocol Build Datacenter Region +node1 127.0.0.1 4648 alive true 3 0.7.1 dc1 global +node2 127.0.0.1 4748 alive false 3 0.7.1 dc1 global +node3 127.0.0.1 4848 alive false 3 0.7.1 dc1 global +node4 127.0.0.1 4948 alive false 3 0.8.0 dc1 global ``` ### Migrations Without a Nomad Version Change