open-nomad/command/agent/operator_endpoint.go

275 lines
8.1 KiB
Go

package agent
import (
"net/http"
"strings"
"fmt"
"strconv"
"time"
"github.com/hashicorp/consul/agent/consul/autopilot"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/raft"
)
func (s *HTTPServer) OperatorRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
path := strings.TrimPrefix(req.URL.Path, "/v1/operator/raft/")
switch {
case strings.HasPrefix(path, "configuration"):
return s.OperatorRaftConfiguration(resp, req)
case strings.HasPrefix(path, "peer"):
return s.OperatorRaftPeer(resp, req)
default:
return nil, CodedError(404, ErrInvalidMethod)
}
}
// OperatorRaftConfiguration is used to inspect the current Raft configuration.
// This supports the stale query mode in case the cluster doesn't have a leader.
func (s *HTTPServer) OperatorRaftConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != "GET" {
resp.WriteHeader(http.StatusMethodNotAllowed)
return nil, nil
}
var args structs.GenericRequest
if done := s.parse(resp, req, &args.Region, &args.QueryOptions); done {
return nil, nil
}
var reply structs.RaftConfigurationResponse
if err := s.agent.RPC("Operator.RaftGetConfiguration", &args, &reply); err != nil {
return nil, err
}
return reply, nil
}
// OperatorRaftPeer supports actions on Raft peers. Currently we only support
// removing peers by address.
func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != "DELETE" {
return nil, CodedError(404, ErrInvalidMethod)
}
params := req.URL.Query()
_, hasID := params["id"]
_, hasAddress := params["address"]
if !hasID && !hasAddress {
return nil, CodedError(http.StatusBadRequest, "Must specify either ?id with the server's ID or ?address with IP:port of peer to remove")
}
if hasID && hasAddress {
return nil, CodedError(http.StatusBadRequest, "Must specify only one of ?id or ?address")
}
if hasID {
var args structs.RaftPeerByIDRequest
s.parseWriteRequest(req, &args.WriteRequest)
var reply struct{}
args.ID = raft.ServerID(params.Get("id"))
if err := s.agent.RPC("Operator.RaftRemovePeerByID", &args, &reply); err != nil {
return nil, err
}
} else {
var args structs.RaftPeerByAddressRequest
s.parseWriteRequest(req, &args.WriteRequest)
var reply struct{}
args.Address = raft.ServerAddress(params.Get("address"))
if err := s.agent.RPC("Operator.RaftRemovePeerByAddress", &args, &reply); err != nil {
return nil, err
}
}
return nil, nil
}
// OperatorAutopilotConfiguration is used to inspect the current Autopilot configuration.
// This supports the stale query mode in case the cluster doesn't have a leader.
func (s *HTTPServer) OperatorAutopilotConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
// Switch on the method
switch req.Method {
case "GET":
var args structs.GenericRequest
if done := s.parse(resp, req, &args.Region, &args.QueryOptions); done {
return nil, nil
}
var reply structs.AutopilotConfig
if err := s.agent.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil {
return nil, err
}
out := api.AutopilotConfiguration{
CleanupDeadServers: reply.CleanupDeadServers,
LastContactThreshold: reply.LastContactThreshold,
MaxTrailingLogs: reply.MaxTrailingLogs,
ServerStabilizationTime: reply.ServerStabilizationTime,
EnableRedundancyZones: reply.EnableRedundancyZones,
DisableUpgradeMigration: reply.DisableUpgradeMigration,
EnableCustomUpgrades: reply.EnableCustomUpgrades,
CreateIndex: reply.CreateIndex,
ModifyIndex: reply.ModifyIndex,
}
return out, nil
case "PUT":
var args structs.AutopilotSetConfigRequest
s.parseWriteRequest(req, &args.WriteRequest)
var conf api.AutopilotConfiguration
if err := decodeBody(req, &conf); err != nil {
return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Error parsing autopilot config: %v", err))
}
args.Config = structs.AutopilotConfig{
CleanupDeadServers: conf.CleanupDeadServers,
LastContactThreshold: conf.LastContactThreshold,
MaxTrailingLogs: conf.MaxTrailingLogs,
ServerStabilizationTime: conf.ServerStabilizationTime,
EnableRedundancyZones: conf.EnableRedundancyZones,
DisableUpgradeMigration: conf.DisableUpgradeMigration,
EnableCustomUpgrades: conf.EnableCustomUpgrades,
}
// Check for cas value
params := req.URL.Query()
if _, ok := params["cas"]; ok {
casVal, err := strconv.ParseUint(params.Get("cas"), 10, 64)
if err != nil {
return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Error parsing cas value: %v", err))
}
args.Config.ModifyIndex = casVal
args.CAS = true
}
var reply bool
if err := s.agent.RPC("Operator.AutopilotSetConfiguration", &args, &reply); err != nil {
return nil, err
}
// Only use the out value if this was a CAS
if !args.CAS {
return true, nil
}
return reply, nil
default:
return nil, CodedError(404, ErrInvalidMethod)
}
}
// OperatorServerHealth is used to get the health of the servers in the given Region.
func (s *HTTPServer) OperatorServerHealth(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != "GET" {
return nil, CodedError(404, ErrInvalidMethod)
}
var args structs.GenericRequest
if done := s.parse(resp, req, &args.Region, &args.QueryOptions); done {
return nil, nil
}
var reply autopilot.OperatorHealthReply
if err := s.agent.RPC("Operator.ServerHealth", &args, &reply); err != nil {
return nil, err
}
// Reply with status 429 if something is unhealthy
if !reply.Healthy {
resp.WriteHeader(http.StatusTooManyRequests)
}
out := &api.OperatorHealthReply{
Healthy: reply.Healthy,
FailureTolerance: reply.FailureTolerance,
}
for _, server := range reply.Servers {
out.Servers = append(out.Servers, api.ServerHealth{
ID: server.ID,
Name: server.Name,
Address: server.Address,
Version: server.Version,
Leader: server.Leader,
SerfStatus: server.SerfStatus.String(),
LastContact: server.LastContact,
LastTerm: server.LastTerm,
LastIndex: server.LastIndex,
Healthy: server.Healthy,
Voter: server.Voter,
StableSince: server.StableSince.Round(time.Second).UTC(),
})
}
return out, nil
}
// OperatorSchedulerConfiguration is used to inspect the current Scheduler configuration.
// This supports the stale query mode in case the cluster doesn't have a leader.
func (s *HTTPServer) OperatorSchedulerConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
// Switch on the method
switch req.Method {
case "GET":
var args structs.GenericRequest
if done := s.parse(resp, req, &args.Region, &args.QueryOptions); done {
return nil, nil
}
var reply structs.SchedulerConfiguration
if err := s.agent.RPC("Operator.SchedulerGetConfiguration", &args, &reply); err != nil {
return nil, err
}
out := api.SchedulerConfiguration{
EnablePreemption: reply.EnablePreemption,
CreateIndex: reply.CreateIndex,
ModifyIndex: reply.ModifyIndex,
}
return out, nil
case "PUT":
var args structs.SchedulerSetConfigRequest
s.parseWriteRequest(req, &args.WriteRequest)
var conf api.SchedulerConfiguration
if err := decodeBody(req, &conf); err != nil {
return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Error parsing autopilot config: %v", err))
}
args.Config = structs.SchedulerConfiguration{
EnablePreemption: conf.EnablePreemption,
}
// Check for cas value
params := req.URL.Query()
if _, ok := params["cas"]; ok {
casVal, err := strconv.ParseUint(params.Get("cas"), 10, 64)
if err != nil {
return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Error parsing cas value: %v", err))
}
args.Config.ModifyIndex = casVal
args.CAS = true
}
var reply bool
if err := s.agent.RPC("Operator.SchedulerSetConfiguration", &args, &reply); err != nil {
return nil, err
}
// Only use the out value if this was a CAS
if !args.CAS {
return true, nil
}
return reply, nil
default:
return nil, CodedError(404, ErrInvalidMethod)
}
}