f3c80c4eef
Protobuf Refactoring for Multi-Module Cleanliness This commit includes the following: Moves all packages that were within proto/ to proto/private Rewrites imports to account for the packages being moved Adds in buf.work.yaml to enable buf workspaces Names the proto-public buf module so that we can override the Go package imports within proto/buf.yaml Bumps the buf version dependency to 1.14.0 (I was trying out the version to see if it would get around an issue - it didn't but it also doesn't break things and it seemed best to keep up with the toolchain changes) Why: In the future we will need to consume other protobuf dependencies such as the Google HTTP annotations for openapi generation or grpc-gateway usage. There were some recent changes to have our own ratelimiting annotations. The two combined were not working when I was trying to use them together (attempting to rebase another branch) Buf workspaces should be the solution to the problem Buf workspaces means that each module will have generated Go code that embeds proto file names relative to the proto dir and not the top level repo root. This resulted in proto file name conflicts in the Go global protobuf type registry. The solution to that was to add in a private/ directory into the path within the proto/ directory. That then required rewriting all the imports. Is this safe? AFAICT yes The gRPC wire protocol doesn't seem to care about the proto file names (although the Go grpc code does tack on the proto file name as Metadata in the ServiceDesc) Other than imports, there were no changes to any generated code as a result of this.
454 lines
14 KiB
Go
454 lines
14 KiB
Go
package agent
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/armon/go-metrics"
|
|
external "github.com/hashicorp/consul/agent/grpc-external"
|
|
"github.com/hashicorp/consul/proto/private/pboperator"
|
|
|
|
multierror "github.com/hashicorp/go-multierror"
|
|
"github.com/hashicorp/raft"
|
|
autopilot "github.com/hashicorp/raft-autopilot"
|
|
|
|
"github.com/hashicorp/consul/acl"
|
|
"github.com/hashicorp/consul/agent/structs"
|
|
"github.com/hashicorp/consul/api"
|
|
)
|
|
|
|
// OperatorRaftConfiguration is used to inspect the current Raft configuration.
|
|
// This supports the stale query mode in case the cluster doesn't have a leader.
|
|
func (s *HTTPHandlers) OperatorRaftConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
|
var args structs.DCSpecificRequest
|
|
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
|
return nil, nil
|
|
}
|
|
|
|
var reply structs.RaftConfigurationResponse
|
|
if err := s.agent.RPC(req.Context(), "Operator.RaftGetConfiguration", &args, &reply); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return reply, nil
|
|
}
|
|
|
|
// OperatorRaftTransferLeader is used to transfer raft cluster leadership to another node
|
|
func (s *HTTPHandlers) OperatorRaftTransferLeader(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
|
|
|
var entMeta acl.EnterpriseMeta
|
|
if err := s.parseEntMetaPartition(req, &entMeta); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
params := req.URL.Query()
|
|
_, hasID := params["id"]
|
|
ID := ""
|
|
if hasID {
|
|
ID = params.Get("id")
|
|
}
|
|
args := pboperator.TransferLeaderRequest{
|
|
ID: ID,
|
|
}
|
|
|
|
var token string
|
|
s.parseToken(req, &token)
|
|
ctx, err := external.ContextWithQueryOptions(req.Context(), structs.QueryOptions{Token: token})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
result, err := s.agent.rpcClientOperator.TransferLeader(ctx, &args)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if result.Success != true {
|
|
return nil, HTTPError{StatusCode: http.StatusNotFound, Reason: fmt.Sprintf("Failed to transfer Leader: %s", err.Error())}
|
|
}
|
|
reply := new(api.TransferLeaderResponse)
|
|
pboperator.TransferLeaderResponseToAPI(result, reply)
|
|
return reply, nil
|
|
}
|
|
|
|
// OperatorRaftPeer supports actions on Raft peers. Currently we only support
|
|
// removing peers by address.
|
|
func (s *HTTPHandlers) OperatorRaftPeer(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
|
var args structs.RaftRemovePeerRequest
|
|
s.parseDC(req, &args.Datacenter)
|
|
s.parseToken(req, &args.Token)
|
|
|
|
params := req.URL.Query()
|
|
_, hasID := params["id"]
|
|
if hasID {
|
|
args.ID = raft.ServerID(params.Get("id"))
|
|
}
|
|
_, hasAddress := params["address"]
|
|
if hasAddress {
|
|
args.Address = raft.ServerAddress(params.Get("address"))
|
|
}
|
|
|
|
if !hasID && !hasAddress {
|
|
return nil, HTTPError{
|
|
StatusCode: http.StatusBadRequest,
|
|
Reason: "Must specify either ?id with the server's ID or ?address with IP:port of peer to remove",
|
|
}
|
|
}
|
|
if hasID && hasAddress {
|
|
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: "Must specify only one of ?id or ?address"}
|
|
}
|
|
|
|
var reply struct{}
|
|
method := "Operator.RaftRemovePeerByID"
|
|
if hasAddress {
|
|
method = "Operator.RaftRemovePeerByAddress"
|
|
}
|
|
if err := s.agent.RPC(req.Context(), method, &args, &reply); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return nil, nil
|
|
}
|
|
|
|
type keyringArgs struct {
|
|
Key string
|
|
Token string
|
|
RelayFactor uint8
|
|
LocalOnly bool // ?local-only; only used for GET requests
|
|
}
|
|
|
|
// OperatorKeyringEndpoint handles keyring operations (install, list, use, remove)
|
|
func (s *HTTPHandlers) OperatorKeyringEndpoint(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
|
var args keyringArgs
|
|
if req.Method == "POST" || req.Method == "PUT" || req.Method == "DELETE" {
|
|
if err := decodeBody(req.Body, &args); err != nil {
|
|
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Request decode failed: %v", err)}
|
|
}
|
|
}
|
|
s.parseToken(req, &args.Token)
|
|
|
|
// Parse relay factor
|
|
if relayFactor := req.URL.Query().Get("relay-factor"); relayFactor != "" {
|
|
n, err := strconv.Atoi(relayFactor)
|
|
if err != nil {
|
|
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Error parsing relay factor: %v", err)}
|
|
}
|
|
|
|
args.RelayFactor, err = ParseRelayFactor(n)
|
|
if err != nil {
|
|
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Invalid relay-factor: %v", err)}
|
|
}
|
|
}
|
|
|
|
// Parse local-only. local-only can only be used in GET requests.
|
|
if localOnly := req.URL.Query().Get("local-only"); localOnly != "" {
|
|
var err error
|
|
args.LocalOnly, err = strconv.ParseBool(localOnly)
|
|
if err != nil {
|
|
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Error parsing local-only: %v", err)}
|
|
}
|
|
|
|
err = ValidateLocalOnly(args.LocalOnly, req.Method == "GET")
|
|
if err != nil {
|
|
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Invalid use of local-only: %v", err)}
|
|
}
|
|
}
|
|
|
|
// Switch on the method
|
|
switch req.Method {
|
|
case "GET":
|
|
return s.KeyringList(resp, req, &args)
|
|
case "POST":
|
|
return s.KeyringInstall(resp, req, &args)
|
|
case "PUT":
|
|
return s.KeyringUse(resp, req, &args)
|
|
case "DELETE":
|
|
return s.KeyringRemove(resp, req, &args)
|
|
default:
|
|
return nil, MethodNotAllowedError{req.Method, []string{"GET", "POST", "PUT", "DELETE"}}
|
|
}
|
|
}
|
|
|
|
// KeyringInstall is used to install a new gossip encryption key into the cluster
|
|
func (s *HTTPHandlers) KeyringInstall(resp http.ResponseWriter, req *http.Request, args *keyringArgs) (interface{}, error) {
|
|
responses, err := s.agent.InstallKey(args.Key, args.Token, args.RelayFactor)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return nil, keyringErrorsOrNil(responses.Responses)
|
|
}
|
|
|
|
// KeyringList is used to list the keys installed in the cluster
|
|
func (s *HTTPHandlers) KeyringList(resp http.ResponseWriter, req *http.Request, args *keyringArgs) (interface{}, error) {
|
|
responses, err := s.agent.ListKeys(args.Token, args.LocalOnly, args.RelayFactor)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return responses.Responses, keyringErrorsOrNil(responses.Responses)
|
|
}
|
|
|
|
// KeyringRemove is used to list the keys installed in the cluster
|
|
func (s *HTTPHandlers) KeyringRemove(resp http.ResponseWriter, req *http.Request, args *keyringArgs) (interface{}, error) {
|
|
responses, err := s.agent.RemoveKey(args.Key, args.Token, args.RelayFactor)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return nil, keyringErrorsOrNil(responses.Responses)
|
|
}
|
|
|
|
// KeyringUse is used to change the primary gossip encryption key
|
|
func (s *HTTPHandlers) KeyringUse(resp http.ResponseWriter, req *http.Request, args *keyringArgs) (interface{}, error) {
|
|
responses, err := s.agent.UseKey(args.Key, args.Token, args.RelayFactor)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return nil, keyringErrorsOrNil(responses.Responses)
|
|
}
|
|
|
|
func keyringErrorsOrNil(responses []*structs.KeyringResponse) error {
|
|
var errs error
|
|
for _, response := range responses {
|
|
if response.Error != "" {
|
|
pool := response.Datacenter + " (LAN)"
|
|
if response.WAN {
|
|
pool = "WAN"
|
|
}
|
|
if response.Segment != "" {
|
|
pool += " [segment: " + response.Segment + "]"
|
|
} else if !acl.IsDefaultPartition(response.Partition) {
|
|
pool += " [partition: " + response.Partition + "]"
|
|
}
|
|
errs = multierror.Append(errs, fmt.Errorf("%s error: %s", pool, response.Error))
|
|
for key, message := range response.Messages {
|
|
errs = multierror.Append(errs, fmt.Errorf("%s: %s", key, message))
|
|
}
|
|
}
|
|
}
|
|
return errs
|
|
}
|
|
|
|
// OperatorAutopilotConfiguration is used to inspect the current Autopilot configuration.
|
|
// This supports the stale query mode in case the cluster doesn't have a leader.
|
|
func (s *HTTPHandlers) OperatorAutopilotConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
|
// Switch on the method
|
|
switch req.Method {
|
|
case "GET":
|
|
var args structs.DCSpecificRequest
|
|
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
|
return nil, nil
|
|
}
|
|
|
|
var reply structs.AutopilotConfig
|
|
if err := s.agent.RPC(req.Context(), "Operator.AutopilotGetConfiguration", &args, &reply); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
out := api.AutopilotConfiguration{
|
|
CleanupDeadServers: reply.CleanupDeadServers,
|
|
LastContactThreshold: api.NewReadableDuration(reply.LastContactThreshold),
|
|
MaxTrailingLogs: reply.MaxTrailingLogs,
|
|
MinQuorum: reply.MinQuorum,
|
|
ServerStabilizationTime: api.NewReadableDuration(reply.ServerStabilizationTime),
|
|
RedundancyZoneTag: reply.RedundancyZoneTag,
|
|
DisableUpgradeMigration: reply.DisableUpgradeMigration,
|
|
UpgradeVersionTag: reply.UpgradeVersionTag,
|
|
CreateIndex: reply.CreateIndex,
|
|
ModifyIndex: reply.ModifyIndex,
|
|
}
|
|
|
|
return out, nil
|
|
|
|
case "PUT":
|
|
var args structs.AutopilotSetConfigRequest
|
|
s.parseDC(req, &args.Datacenter)
|
|
s.parseToken(req, &args.Token)
|
|
|
|
conf := api.NewAutopilotConfiguration()
|
|
if err := decodeBody(req.Body, &conf); err != nil {
|
|
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Error parsing autopilot config: %v", err)}
|
|
}
|
|
|
|
args.Config = structs.AutopilotConfig{
|
|
CleanupDeadServers: conf.CleanupDeadServers,
|
|
LastContactThreshold: conf.LastContactThreshold.Duration(),
|
|
MaxTrailingLogs: conf.MaxTrailingLogs,
|
|
MinQuorum: conf.MinQuorum,
|
|
ServerStabilizationTime: conf.ServerStabilizationTime.Duration(),
|
|
RedundancyZoneTag: conf.RedundancyZoneTag,
|
|
DisableUpgradeMigration: conf.DisableUpgradeMigration,
|
|
UpgradeVersionTag: conf.UpgradeVersionTag,
|
|
}
|
|
|
|
// Check for cas value
|
|
params := req.URL.Query()
|
|
if _, ok := params["cas"]; ok {
|
|
casVal, err := strconv.ParseUint(params.Get("cas"), 10, 64)
|
|
if err != nil {
|
|
return nil, HTTPError{StatusCode: http.StatusBadRequest, Reason: fmt.Sprintf("Error parsing cas value: %v", err)}
|
|
}
|
|
args.Config.ModifyIndex = casVal
|
|
args.CAS = true
|
|
}
|
|
|
|
var reply bool
|
|
if err := s.agent.RPC(req.Context(), "Operator.AutopilotSetConfiguration", &args, &reply); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Only use the out value if this was a CAS
|
|
if !args.CAS {
|
|
return true, nil
|
|
}
|
|
return reply, nil
|
|
|
|
default:
|
|
return nil, MethodNotAllowedError{req.Method, []string{"GET", "PUT"}}
|
|
}
|
|
}
|
|
|
|
// OperatorServerHealth is used to get the health of the servers in the local DC
|
|
func (s *HTTPHandlers) OperatorServerHealth(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
|
var args structs.DCSpecificRequest
|
|
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
|
return nil, nil
|
|
}
|
|
|
|
var reply structs.AutopilotHealthReply
|
|
if err := s.agent.RPC(req.Context(), "Operator.ServerHealth", &args, &reply); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Reply with status 429 if something is unhealthy
|
|
if !reply.Healthy {
|
|
resp.WriteHeader(http.StatusTooManyRequests)
|
|
}
|
|
|
|
out := &api.OperatorHealthReply{
|
|
Healthy: reply.Healthy,
|
|
FailureTolerance: reply.FailureTolerance,
|
|
}
|
|
for _, server := range reply.Servers {
|
|
out.Servers = append(out.Servers, api.ServerHealth{
|
|
ID: server.ID,
|
|
Name: server.Name,
|
|
Address: server.Address,
|
|
Version: server.Version,
|
|
Leader: server.Leader,
|
|
SerfStatus: server.SerfStatus.String(),
|
|
LastContact: api.NewReadableDuration(server.LastContact),
|
|
LastTerm: server.LastTerm,
|
|
LastIndex: server.LastIndex,
|
|
Healthy: server.Healthy,
|
|
Voter: server.Voter,
|
|
StableSince: server.StableSince.Round(time.Second).UTC(),
|
|
})
|
|
}
|
|
|
|
return out, nil
|
|
}
|
|
|
|
func (s *HTTPHandlers) OperatorAutopilotState(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
|
var args structs.DCSpecificRequest
|
|
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
|
return nil, nil
|
|
}
|
|
|
|
var reply autopilot.State
|
|
if err := s.agent.RPC(req.Context(), "Operator.AutopilotState", &args, &reply); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
out := autopilotToAPIState(&reply)
|
|
return out, nil
|
|
}
|
|
|
|
func (s *HTTPHandlers) OperatorUsage(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
|
metrics.IncrCounterWithLabels([]string{"client", "api", "operator_usage"}, 1,
|
|
s.nodeMetricsLabels())
|
|
|
|
var args structs.OperatorUsageRequest
|
|
|
|
if err := s.parseEntMetaNoWildcard(req, &args.EnterpriseMeta); err != nil {
|
|
return nil, err
|
|
}
|
|
if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done {
|
|
return nil, nil
|
|
}
|
|
if _, ok := req.URL.Query()["global"]; ok {
|
|
args.Global = true
|
|
}
|
|
|
|
// Make the RPC request
|
|
var out structs.Usage
|
|
defer setMeta(resp, &out.QueryMeta)
|
|
RETRY_ONCE:
|
|
err := s.agent.RPC(req.Context(), "Operator.Usage", &args, &out)
|
|
if err != nil {
|
|
metrics.IncrCounterWithLabels([]string{"client", "rpc", "error", "operator_usage"}, 1,
|
|
s.nodeMetricsLabels())
|
|
return nil, err
|
|
}
|
|
if args.QueryOptions.AllowStale && args.MaxStaleDuration > 0 && args.MaxStaleDuration < out.LastContact {
|
|
args.AllowStale = false
|
|
args.MaxStaleDuration = 0
|
|
goto RETRY_ONCE
|
|
}
|
|
out.ConsistencyLevel = args.QueryOptions.ConsistencyLevel()
|
|
metrics.IncrCounterWithLabels([]string{"client", "api", "success", "operator_usage"}, 1,
|
|
s.nodeMetricsLabels())
|
|
return out, nil
|
|
}
|
|
|
|
func stringIDs(ids []raft.ServerID) []string {
|
|
out := make([]string, len(ids))
|
|
for i, id := range ids {
|
|
out[i] = string(id)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func autopilotToAPIState(state *autopilot.State) *api.AutopilotState {
|
|
out := &api.AutopilotState{
|
|
Healthy: state.Healthy,
|
|
FailureTolerance: state.FailureTolerance,
|
|
Leader: string(state.Leader),
|
|
Voters: stringIDs(state.Voters),
|
|
Servers: make(map[string]api.AutopilotServer),
|
|
}
|
|
|
|
for id, srv := range state.Servers {
|
|
out.Servers[string(id)] = autopilotToAPIServer(srv)
|
|
}
|
|
|
|
autopilotToAPIStateEnterprise(state, out)
|
|
|
|
return out
|
|
}
|
|
|
|
func autopilotToAPIServer(srv *autopilot.ServerState) api.AutopilotServer {
|
|
apiSrv := api.AutopilotServer{
|
|
ID: string(srv.Server.ID),
|
|
Name: srv.Server.Name,
|
|
Address: string(srv.Server.Address),
|
|
NodeStatus: string(srv.Server.NodeStatus),
|
|
Version: srv.Server.Version,
|
|
LastContact: api.NewReadableDuration(srv.Stats.LastContact),
|
|
LastTerm: srv.Stats.LastTerm,
|
|
LastIndex: srv.Stats.LastIndex,
|
|
Healthy: srv.Health.Healthy,
|
|
StableSince: srv.Health.StableSince,
|
|
Status: api.AutopilotServerStatus(srv.State),
|
|
Meta: srv.Server.Meta,
|
|
NodeType: api.AutopilotServerType(srv.Server.NodeType),
|
|
}
|
|
|
|
autopilotToAPIServerEnterprise(srv, &apiSrv)
|
|
|
|
return apiSrv
|
|
}
|