open-consul/agent/consul/operator_autopilot_endpoint.go
Daniel Nephin 8654adfc53 Handle FSM.Apply errors in raftApply
Previously we were inconsistently checking the response for errors. This
PR moves the response-is-error check into raftApply, so that all callers
can look at only the error response, instead of having to know that
errors could come from two places.

This should expose a few more errors that were previously hidden because
in some calls to raftApply we were ignoring the response return value.

Also handle errors more consistently. In some cases we would log the
error before returning it. This can be very confusing because it can
result in the same error being logged multiple times. Instead return
a wrapped error.
2021-04-20 13:29:29 -04:00

173 lines
5 KiB
Go

package consul
import (
"fmt"
autopilot "github.com/hashicorp/raft-autopilot"
"github.com/hashicorp/serf/serf"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/structs"
)
// AutopilotGetConfiguration is used to retrieve the current Autopilot configuration.
func (op *Operator) AutopilotGetConfiguration(args *structs.DCSpecificRequest, reply *structs.AutopilotConfig) error {
if done, err := op.srv.ForwardRPC("Operator.AutopilotGetConfiguration", args, args, reply); done {
return err
}
// This action requires operator read access.
identity, rule, err := op.srv.ResolveTokenToIdentityAndAuthorizer(args.Token)
if err != nil {
return err
}
if err := op.srv.validateEnterpriseToken(identity); err != nil {
return err
}
if rule != nil && rule.OperatorRead(nil) != acl.Allow {
return acl.PermissionDenied("Missing operator:read permissions")
}
state := op.srv.fsm.State()
_, config, err := state.AutopilotConfig()
if err != nil {
return err
}
if config == nil {
return fmt.Errorf("autopilot config not initialized yet")
}
*reply = *config
return nil
}
// AutopilotSetConfiguration is used to set the current Autopilot configuration.
func (op *Operator) AutopilotSetConfiguration(args *structs.AutopilotSetConfigRequest, reply *bool) error {
if done, err := op.srv.ForwardRPC("Operator.AutopilotSetConfiguration", args, args, reply); done {
return err
}
// This action requires operator write access.
identity, rule, err := op.srv.ResolveTokenToIdentityAndAuthorizer(args.Token)
if err != nil {
return err
}
if err := op.srv.validateEnterpriseToken(identity); err != nil {
return err
}
if rule != nil && rule.OperatorWrite(nil) != acl.Allow {
return acl.PermissionDenied("Missing operator:write permissions")
}
// Apply the update
resp, err := op.srv.raftApply(structs.AutopilotRequestType, args)
if err != nil {
return fmt.Errorf("raft apply failed: %w", err)
}
// Check if the return type is a bool.
if respBool, ok := resp.(bool); ok {
*reply = respBool
}
return nil
}
// ServerHealth is used to get the current health of the servers.
func (op *Operator) ServerHealth(args *structs.DCSpecificRequest, reply *structs.AutopilotHealthReply) error {
// This must be sent to the leader, so we fix the args since we are
// re-using a structure where we don't support all the options.
args.RequireConsistent = true
args.AllowStale = false
if done, err := op.srv.ForwardRPC("Operator.ServerHealth", args, args, reply); done {
return err
}
// This action requires operator read access.
identity, rule, err := op.srv.ResolveTokenToIdentityAndAuthorizer(args.Token)
if err != nil {
return err
}
if err := op.srv.validateEnterpriseToken(identity); err != nil {
return err
}
if rule != nil && rule.OperatorRead(nil) != acl.Allow {
return acl.PermissionDenied("Missing operator:read permissions")
}
state := op.srv.autopilot.GetState()
if state == nil {
// this behavior seems odd but its functionally equivalent to 1.8.5 where if
// autopilot didn't have a health reply yet it would just return no error
return nil
}
health := structs.AutopilotHealthReply{
Healthy: state.Healthy,
FailureTolerance: state.FailureTolerance,
}
for _, srv := range state.Servers {
srvHealth := structs.AutopilotServerHealth{
ID: string(srv.Server.ID),
Name: srv.Server.Name,
Address: string(srv.Server.Address),
Version: srv.Server.Version,
Leader: srv.State == autopilot.RaftLeader,
Voter: srv.State == autopilot.RaftLeader || srv.State == autopilot.RaftVoter,
LastContact: srv.Stats.LastContact,
LastTerm: srv.Stats.LastTerm,
LastIndex: srv.Stats.LastIndex,
Healthy: srv.Health.Healthy,
StableSince: srv.Health.StableSince,
}
switch srv.Server.NodeStatus {
case autopilot.NodeAlive:
srvHealth.SerfStatus = serf.StatusAlive
case autopilot.NodeLeft:
srvHealth.SerfStatus = serf.StatusLeft
case autopilot.NodeFailed:
srvHealth.SerfStatus = serf.StatusFailed
default:
srvHealth.SerfStatus = serf.StatusNone
}
health.Servers = append(health.Servers, srvHealth)
}
*reply = health
return nil
}
func (op *Operator) AutopilotState(args *structs.DCSpecificRequest, reply *autopilot.State) error {
// This must be sent to the leader, so we fix the args since we are
// re-using a structure where we don't support all the options.
args.RequireConsistent = true
args.AllowStale = false
if done, err := op.srv.ForwardRPC("Operator.AutopilotState", args, args, reply); done {
return err
}
// This action requires operator read access.
identity, rule, err := op.srv.ResolveTokenToIdentityAndAuthorizer(args.Token)
if err != nil {
return err
}
if err := op.srv.validateEnterpriseToken(identity); err != nil {
return err
}
if rule != nil && rule.OperatorRead(nil) != acl.Allow {
return acl.PermissionDenied("Missing operator:read permissions")
}
state := op.srv.autopilot.GetState()
if state == nil {
return fmt.Errorf("Failed to get autopilot state: no state found")
}
*reply = *state
return nil
}