// Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: MPL-2.0 package api import ( "bytes" "fmt" "io" "strconv" "strings" "time" ) // AutopilotConfiguration is used for querying/setting the Autopilot configuration. // Autopilot helps manage operator tasks related to Consul servers like removing // failed servers from the Raft quorum. type AutopilotConfiguration struct { // CleanupDeadServers controls whether to remove dead servers from the Raft // peer list when a new server joins CleanupDeadServers bool // LastContactThreshold is the limit on the amount of time a server can go // without leader contact before being considered unhealthy. LastContactThreshold *ReadableDuration // MaxTrailingLogs is the amount of entries in the Raft Log that a server can // be behind before being considered unhealthy. MaxTrailingLogs uint64 // MinQuorum sets the minimum number of servers allowed in a cluster before // autopilot can prune dead servers. MinQuorum uint // ServerStabilizationTime is the minimum amount of time a server must be // in a stable, healthy state before it can be added to the cluster. Only // applicable with Raft protocol version 3 or higher. ServerStabilizationTime *ReadableDuration // (Enterprise-only) RedundancyZoneTag is the node tag to use for separating // servers into zones for redundancy. If left blank, this feature will be disabled. RedundancyZoneTag string // (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration // strategy of waiting until enough newer-versioned servers have been added to the // cluster before promoting them to voters. DisableUpgradeMigration bool // (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when // performing upgrade migrations. If left blank, the Consul version will be used. UpgradeVersionTag string // CreateIndex holds the index corresponding the creation of this configuration. // This is a read-only field. CreateIndex uint64 // ModifyIndex will be set to the index of the last update when retrieving the // Autopilot configuration. Resubmitting a configuration with // AutopilotCASConfiguration will perform a check-and-set operation which ensures // there hasn't been a subsequent update since the configuration was retrieved. ModifyIndex uint64 } // Defines default values for the AutopilotConfiguration type, consistent with // https://www.consul.io/api-docs/operator/autopilot#parameters-1 func NewAutopilotConfiguration() AutopilotConfiguration { cfg := AutopilotConfiguration{ CleanupDeadServers: true, LastContactThreshold: NewReadableDuration(200 * time.Millisecond), MaxTrailingLogs: 250, MinQuorum: 0, ServerStabilizationTime: NewReadableDuration(10 * time.Second), RedundancyZoneTag: "", DisableUpgradeMigration: false, UpgradeVersionTag: "", } return cfg } // ServerHealth is the health (from the leader's point of view) of a server. type ServerHealth struct { // ID is the raft ID of the server. ID string // Name is the node name of the server. Name string // Address is the address of the server. Address string // The status of the SerfHealth check for the server. SerfStatus string // Version is the Consul version of the server. Version string // Leader is whether this server is currently the leader. Leader bool // LastContact is the time since this node's last contact with the leader. LastContact *ReadableDuration // LastTerm is the highest leader term this server has a record of in its Raft log. LastTerm uint64 // LastIndex is the last log index this server has a record of in its Raft log. LastIndex uint64 // Healthy is whether or not the server is healthy according to the current // Autopilot config. Healthy bool // Voter is whether this is a voting server. Voter bool // StableSince is the last time this server's Healthy value changed. StableSince time.Time } // OperatorHealthReply is a representation of the overall health of the cluster type OperatorHealthReply struct { // Healthy is true if all the servers in the cluster are healthy. Healthy bool // FailureTolerance is the number of healthy servers that could be lost without // an outage occurring. FailureTolerance int // Servers holds the health of each server. Servers []ServerHealth } type AutopilotState struct { Healthy bool FailureTolerance int OptimisticFailureTolerance int Servers map[string]AutopilotServer Leader string Voters []string ReadReplicas []string `json:",omitempty"` RedundancyZones map[string]AutopilotZone `json:",omitempty"` Upgrade *AutopilotUpgrade `json:",omitempty"` } type AutopilotServer struct { ID string Name string Address string NodeStatus string Version string LastContact *ReadableDuration LastTerm uint64 LastIndex uint64 Healthy bool StableSince time.Time RedundancyZone string `json:",omitempty"` UpgradeVersion string `json:",omitempty"` ReadReplica bool Status AutopilotServerStatus Meta map[string]string NodeType AutopilotServerType } type AutopilotServerStatus string const ( AutopilotServerNone AutopilotServerStatus = "none" AutopilotServerLeader AutopilotServerStatus = "leader" AutopilotServerVoter AutopilotServerStatus = "voter" AutopilotServerNonVoter AutopilotServerStatus = "non-voter" AutopilotServerStaging AutopilotServerStatus = "staging" ) type AutopilotServerType string const ( AutopilotTypeVoter AutopilotServerType = "voter" AutopilotTypeReadReplica AutopilotServerType = "read-replica" AutopilotTypeZoneVoter AutopilotServerType = "zone-voter" AutopilotTypeZoneExtraVoter AutopilotServerType = "zone-extra-voter" AutopilotTypeZoneStandby AutopilotServerType = "zone-standby" ) type AutopilotZone struct { Servers []string Voters []string FailureTolerance int } type AutopilotZoneUpgradeVersions struct { TargetVersionVoters []string `json:",omitempty"` TargetVersionNonVoters []string `json:",omitempty"` OtherVersionVoters []string `json:",omitempty"` OtherVersionNonVoters []string `json:",omitempty"` } type AutopilotUpgrade struct { Status AutopilotUpgradeStatus TargetVersion string `json:",omitempty"` TargetVersionVoters []string `json:",omitempty"` TargetVersionNonVoters []string `json:",omitempty"` TargetVersionReadReplicas []string `json:",omitempty"` OtherVersionVoters []string `json:",omitempty"` OtherVersionNonVoters []string `json:",omitempty"` OtherVersionReadReplicas []string `json:",omitempty"` RedundancyZones map[string]AutopilotZoneUpgradeVersions `json:",omitempty"` } type AutopilotUpgradeStatus string const ( // AutopilotUpgradeIdle is the status when no upgrade is in progress. AutopilotUpgradeIdle AutopilotUpgradeStatus = "idle" // AutopilotUpgradeAwaitNewVoters is the status when more servers of // the target version must be added in order to start the promotion // phase of the upgrade AutopilotUpgradeAwaitNewVoters AutopilotUpgradeStatus = "await-new-voters" // AutopilotUpgradePromoting is the status when autopilot is promoting // servers of the target version. AutopilotUpgradePromoting AutopilotUpgradeStatus = "promoting" // AutopilotUpgradeDemoting is the status when autopilot is demoting // servers not on the target version AutopilotUpgradeDemoting AutopilotUpgradeStatus = "demoting" // AutopilotUpgradeLeaderTransfer is the status when autopilot is transferring // leadership from a server running an older version to a server // using the target version. AutopilotUpgradeLeaderTransfer AutopilotUpgradeStatus = "leader-transfer" // AutopilotUpgradeAwaitNewServers is the status when autpilot has finished // transferring leadership and has demoted all the other versioned // servers but wants to indicate that more target version servers // are needed to replace all the existing other version servers. AutopilotUpgradeAwaitNewServers AutopilotUpgradeStatus = "await-new-servers" // AutopilotUpgradeAwaitServerRemoval is the status when autopilot is waiting // for the servers on non-target versions to be removed AutopilotUpgradeAwaitServerRemoval AutopilotUpgradeStatus = "await-server-removal" // AutopilotUpgradeDisabled is the status when automated ugprades are // disabled in the autopilot configuration AutopilotUpgradeDisabled AutopilotUpgradeStatus = "disabled" ) // ReadableDuration is a duration type that is serialized to JSON in human readable format. type ReadableDuration time.Duration func NewReadableDuration(dur time.Duration) *ReadableDuration { d := ReadableDuration(dur) return &d } func (d *ReadableDuration) String() string { return d.Duration().String() } func (d *ReadableDuration) Duration() time.Duration { if d == nil { return time.Duration(0) } return time.Duration(*d) } func (d *ReadableDuration) MarshalJSON() ([]byte, error) { return []byte(fmt.Sprintf(`"%s"`, d.Duration().String())), nil } func (d *ReadableDuration) UnmarshalJSON(raw []byte) (err error) { if d == nil { return fmt.Errorf("cannot unmarshal to nil pointer") } var dur time.Duration str := string(raw) if len(str) >= 2 && str[0] == '"' && str[len(str)-1] == '"' { // quoted string dur, err = time.ParseDuration(str[1 : len(str)-1]) if err != nil { return err } } else { // no quotes, not a string v, err := strconv.ParseFloat(str, 64) if err != nil { return err } dur = time.Duration(v) } *d = ReadableDuration(dur) return nil } // AutopilotGetConfiguration is used to query the current Autopilot configuration. func (op *Operator) AutopilotGetConfiguration(q *QueryOptions) (*AutopilotConfiguration, error) { r := op.c.newRequest("GET", "/v1/operator/autopilot/configuration") r.setQueryOptions(q) _, resp, err := op.c.doRequest(r) if err != nil { return nil, err } defer closeResponseBody(resp) if err := requireOK(resp); err != nil { return nil, err } var out AutopilotConfiguration if err := decodeBody(resp, &out); err != nil { return nil, err } return &out, nil } // AutopilotSetConfiguration is used to set the current Autopilot configuration. func (op *Operator) AutopilotSetConfiguration(conf *AutopilotConfiguration, q *WriteOptions) error { r := op.c.newRequest("PUT", "/v1/operator/autopilot/configuration") r.setWriteOptions(q) r.obj = conf _, resp, err := op.c.doRequest(r) if err != nil { return err } defer closeResponseBody(resp) if err := requireOK(resp); err != nil { return err } return nil } // AutopilotCASConfiguration is used to perform a Check-And-Set update on the // Autopilot configuration. The ModifyIndex value will be respected. Returns // true on success or false on failures. func (op *Operator) AutopilotCASConfiguration(conf *AutopilotConfiguration, q *WriteOptions) (bool, error) { r := op.c.newRequest("PUT", "/v1/operator/autopilot/configuration") r.setWriteOptions(q) r.params.Set("cas", strconv.FormatUint(conf.ModifyIndex, 10)) r.obj = conf _, resp, err := op.c.doRequest(r) if err != nil { return false, err } defer closeResponseBody(resp) if err := requireOK(resp); err != nil { return false, err } var buf bytes.Buffer if _, err := io.Copy(&buf, resp.Body); err != nil { return false, fmt.Errorf("Failed to read response: %v", err) } res := strings.Contains(buf.String(), "true") return res, nil } // AutopilotServerHealth func (op *Operator) AutopilotServerHealth(q *QueryOptions) (*OperatorHealthReply, error) { r := op.c.newRequest("GET", "/v1/operator/autopilot/health") r.setQueryOptions(q) // we use 429 status to indicate unhealthiness _, resp, err := op.c.doRequest(r) if err != nil { return nil, err } defer closeResponseBody(resp) err = requireHttpCodes(resp, 200, 429) if err != nil { return nil, err } defer closeResponseBody(resp) var out OperatorHealthReply if err := decodeBody(resp, &out); err != nil { return nil, err } return &out, nil } func (op *Operator) AutopilotState(q *QueryOptions) (*AutopilotState, error) { r := op.c.newRequest("GET", "/v1/operator/autopilot/state") r.setQueryOptions(q) _, resp, err := op.c.doRequest(r) if err != nil { return nil, err } defer closeResponseBody(resp) if err := requireOK(resp); err != nil { return nil, err } var out AutopilotState if err := decodeBody(resp, &out); err != nil { return nil, err } return &out, nil }