Operator command/endpoint/documentation

This commit is contained in:
Alex Dadgar 2017-02-09 17:58:20 -08:00
parent 4da380bbbf
commit 2d4d9b79d8
26 changed files with 1179 additions and 17 deletions

81
api/operator.go Normal file
View File

@ -0,0 +1,81 @@
package api
// Operator can be used to perform low-level operator tasks for Nomad.
type Operator struct {
c *Client
}
// Operator returns a handle to the operator endpoints.
func (c *Client) Operator() *Operator {
return &Operator{c}
}
// RaftServer has information about a server in the Raft configuration.
type RaftServer struct {
// ID is the unique ID for the server. These are currently the same
// as the address, but they will be changed to a real GUID in a future
// release of Nomad.
ID string
// Node is the node name of the server, as known by Nomad, or this
// will be set to "(unknown)" otherwise.
Node string
// Address is the IP:port of the server, used for Raft communications.
Address string
// Leader is true if this server is the current cluster leader.
Leader bool
// Voter is true if this server has a vote in the cluster. This might
// be false if the server is staging and still coming online, or if
// it's a non-voting server, which will be added in a future release of
// Nomad.
Voter bool
}
// RaftConfigration is returned when querying for the current Raft configuration.
type RaftConfiguration struct {
// Servers has the list of servers in the Raft configuration.
Servers []*RaftServer
// Index has the Raft index of this configuration.
Index uint64
}
// RaftGetConfiguration is used to query the current Raft peer set.
func (op *Operator) RaftGetConfiguration(q *QueryOptions) (*RaftConfiguration, error) {
r := op.c.newRequest("GET", "/v1/operator/raft/configuration")
r.setQueryOptions(q)
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return nil, err
}
defer resp.Body.Close()
var out RaftConfiguration
if err := decodeBody(resp, &out); err != nil {
return nil, err
}
return &out, nil
}
// RaftRemovePeerByAddress is used to kick a stale peer (one that it in the Raft
// quorum but no longer known to Serf or the catalog) by address in the form of
// "IP:port".
func (op *Operator) RaftRemovePeerByAddress(address string, q *WriteOptions) error {
r := op.c.newRequest("DELETE", "/v1/operator/raft/peer")
r.setWriteOptions(q)
// TODO (alexdadgar) Currently we made address a query parameter. Once
// IDs are in place this will be DELETE /v1/operator/raft/peer/<id>.
r.params.Set("address", string(address))
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}

36
api/operator_test.go Normal file
View File

@ -0,0 +1,36 @@
package api
import (
"strings"
"testing"
)
func TestOperator_RaftGetConfiguration(t *testing.T) {
c, s := makeClient(t, nil, nil)
defer s.Stop()
operator := c.Operator()
out, err := operator.RaftGetConfiguration(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if len(out.Servers) != 1 ||
!out.Servers[0].Leader ||
!out.Servers[0].Voter {
t.Fatalf("bad: %v", out)
}
}
func TestOperator_RaftRemovePeerByAddress(t *testing.T) {
c, s := makeClient(t, nil, nil)
defer s.Stop()
// If we get this error, it proves we sent the address all the way
// through.
operator := c.Operator()
err := operator.RaftRemovePeerByAddress("nope", nil)
if err == nil || !strings.Contains(err.Error(),
"address \"nope\" was not found in the Raft configuration") {
t.Fatalf("err: %v", err)
}
}

View File

@ -170,6 +170,8 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) {
s.mux.HandleFunc("/v1/status/leader", s.wrap(s.StatusLeaderRequest))
s.mux.HandleFunc("/v1/status/peers", s.wrap(s.StatusPeersRequest))
s.mux.HandleFunc("/v1/operator/", s.wrap(s.OperatorRequest))
s.mux.HandleFunc("/v1/system/gc", s.wrap(s.GarbageCollectRequest))
s.mux.HandleFunc("/v1/system/reconcile/summaries", s.wrap(s.ReconcileJobSummaries))

View File

@ -0,0 +1,69 @@
package agent
import (
"net/http"
"strings"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/raft"
)
func (s *HTTPServer) OperatorRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
path := strings.TrimPrefix(req.URL.Path, "/v1/operator/raft/")
switch {
case strings.HasPrefix(path, "configuration"):
return s.OperatorRaftConfiguration(resp, req)
case strings.HasPrefix(path, "peer"):
return s.OperatorRaftPeer(resp, req)
default:
return nil, CodedError(404, ErrInvalidMethod)
}
}
// OperatorRaftConfiguration is used to inspect the current Raft configuration.
// This supports the stale query mode in case the cluster doesn't have a leader.
func (s *HTTPServer) OperatorRaftConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != "GET" {
resp.WriteHeader(http.StatusMethodNotAllowed)
return nil, nil
}
var args structs.GenericRequest
if done := s.parse(resp, req, &args.Region, &args.QueryOptions); done {
return nil, nil
}
var reply structs.RaftConfigurationResponse
if err := s.agent.RPC("Operator.RaftGetConfiguration", &args, &reply); err != nil {
return nil, err
}
return reply, nil
}
// OperatorRaftPeer supports actions on Raft peers. Currently we only support
// removing peers by address.
func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != "DELETE" {
resp.WriteHeader(http.StatusMethodNotAllowed)
return nil, nil
}
var args structs.RaftPeerByAddressRequest
s.parseRegion(req, &args.Region)
params := req.URL.Query()
if _, ok := params["address"]; ok {
args.Address = raft.ServerAddress(params.Get("address"))
} else {
resp.WriteHeader(http.StatusBadRequest)
resp.Write([]byte("Must specify ?address with IP:port of peer to remove"))
return nil, nil
}
var reply struct{}
if err := s.agent.RPC("Operator.RaftRemovePeerByAddress", &args, &reply); err != nil {
return nil, err
}
return nil, nil
}

View File

@ -0,0 +1,58 @@
package agent
import (
"bytes"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/hashicorp/nomad/nomad/structs"
)
func TestHTTP_OperatorRaftConfiguration(t *testing.T) {
httpTest(t, nil, func(s *TestServer) {
body := bytes.NewBuffer(nil)
req, err := http.NewRequest("GET", "/v1/operator/raft/configuration", body)
if err != nil {
t.Fatalf("err: %v", err)
}
resp := httptest.NewRecorder()
obj, err := s.Server.OperatorRaftConfiguration(resp, req)
if err != nil {
t.Fatalf("err: %v", err)
}
if resp.Code != 200 {
t.Fatalf("bad code: %d", resp.Code)
}
out, ok := obj.(structs.RaftConfigurationResponse)
if !ok {
t.Fatalf("unexpected: %T", obj)
}
if len(out.Servers) != 1 ||
!out.Servers[0].Leader ||
!out.Servers[0].Voter {
t.Fatalf("bad: %v", out)
}
})
}
func TestHTTP_OperatorRaftPeer(t *testing.T) {
httpTest(t, nil, func(s *TestServer) {
body := bytes.NewBuffer(nil)
req, err := http.NewRequest("DELETE", "/v1/operator/raft/peer?address=nope", body)
if err != nil {
t.Fatalf("err: %v", err)
}
// If we get this error, it proves we sent the address all the
// way through.
resp := httptest.NewRecorder()
_, err = s.Server.OperatorRaftPeer(resp, req)
if err == nil || !strings.Contains(err.Error(),
"address \"nope\" was not found in the Raft configuration") {
t.Fatalf("err: %v", err)
}
})
}

View File

@ -33,11 +33,11 @@ General Options:
Dispatch Options:
-meta <key>=<value>
Meta takes a key/value pair seperated by "=". The metadata key will be
merged into the job's metadata. The job may define a default value for the
key which is overriden when dispatching. The flag can be provided more than
once to inject multiple metadata key/value pairs. Arbitrary keys are not
allowed. The parameterized job must allow the key to be merged.
Meta takes a key/value pair seperated by "=". The metadata key will be
merged into the job's metadata. The job may define a default value for the
key which is overriden when dispatching. The flag can be provided more than
once to inject multiple metadata key/value pairs. Arbitrary keys are not
allowed. The parameterized job must allow the key to be merged.
-detach
Return immediately instead of entering monitor mode. After job dispatch,

32
command/operator.go Normal file
View File

@ -0,0 +1,32 @@
package command
import (
"strings"
"github.com/mitchellh/cli"
)
type OperatorCommand struct {
Meta
}
func (f *OperatorCommand) Help() string {
helpText := `
Usage: nomad operator <subcommand> [options]
Provides cluster-level tools for Nomad operators, such as interacting with
the Raft subsystem. NOTE: Use this command with extreme caution, as improper
use could lead to a Nomad outage and even loss of data.
Run nomad operator <subcommand> with no arguments for help on that subcommand.
`
return strings.TrimSpace(helpText)
}
func (f *OperatorCommand) Synopsis() string {
return "Provides cluster-level tools for Nomad operators"
}
func (f *OperatorCommand) Run(args []string) int {
return cli.RunResultHelp
}

30
command/operator_raft.go Normal file
View File

@ -0,0 +1,30 @@
package command
import (
"strings"
"github.com/mitchellh/cli"
)
type OperatorRaftCommand struct {
Meta
}
func (c *OperatorRaftCommand) Help() string {
helpText := `
Usage: nomad operator raft <subcommand> [options]
The Raft operator command is used to interact with Nomad's Raft subsystem. The
command can be used to verify Raft peers or in rare cases to recover quorum by
removing invalid peers.
`
return strings.TrimSpace(helpText)
}
func (c *OperatorRaftCommand) Synopsis() string {
return "Provides access to the Raft subsystem"
}
func (c *OperatorRaftCommand) Run(args []string) int {
return cli.RunResultHelp
}

View File

@ -0,0 +1,82 @@
package command
import (
"fmt"
"strings"
"github.com/hashicorp/nomad/api"
"github.com/ryanuber/columnize"
)
type OperatorRaftListCommand struct {
Meta
}
func (c *OperatorRaftListCommand) Help() string {
helpText := `
Usage: nomad operator raft list-peers [options]
Displays the current Raft peer configuration.
General Options:
` + generalOptionsUsage() + `
List Peers Options:
-stale=[true|false]
The -stale argument defaults to "false" which means the leader provides the
result. If the cluster is in an outage state without a leader, you may need
to set -stale to "true" to get the configuration from a non-leader server.
`
return strings.TrimSpace(helpText)
}
func (c *OperatorRaftListCommand) Synopsis() string {
return "Display the current Raft peer configuration"
}
func (c *OperatorRaftListCommand) Run(args []string) int {
var stale bool
flags := c.Meta.FlagSet("raft", FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&stale, "stale", false, "")
if err := flags.Parse(args); err != nil {
c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err))
return 1
}
// Set up a client.
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}
operator := client.Operator()
// Fetch the current configuration.
q := &api.QueryOptions{
AllowStale: stale,
}
reply, err := operator.RaftGetConfiguration(q)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to retrieve raft configuration: %v", err))
return 1
}
// Format it as a nice table.
result := []string{"Node|ID|Address|State|Voter"}
for _, s := range reply.Servers {
state := "follower"
if s.Leader {
state = "leader"
}
result = append(result, fmt.Sprintf("%s|%s|%s|%s|%v",
s.Node, s.ID, s.Address, state, s.Voter))
}
c.Ui.Output(columnize.SimpleFormat(result))
return 0
}

View File

@ -0,0 +1,30 @@
package command
import (
"strings"
"testing"
"github.com/mitchellh/cli"
)
func TestOperator_Raft_ListPeers_Implements(t *testing.T) {
var _ cli.Command = &OperatorRaftListCommand{}
}
func TestOperator_Raft_ListPeers(t *testing.T) {
s, _, addr := testServer(t, nil)
defer s.Stop()
ui := new(cli.MockUi)
c := &OperatorRaftListCommand{Meta: Meta{Ui: ui}}
args := []string{"-address=" + addr}
code := c.Run(args)
if code != 0 {
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
}
output := strings.TrimSpace(ui.OutputWriter.String())
if !strings.Contains(output, "leader") {
t.Fatalf("bad: %s", output)
}
}

View File

@ -0,0 +1,79 @@
package command
import (
"fmt"
"strings"
"github.com/hashicorp/nomad/api"
)
type OperatorRaftRemoveCommand struct {
Meta
}
func (c *OperatorRaftRemoveCommand) Help() string {
helpText := `
Usage: nomad operator raft remove-peer [options]
Remove the Nomad server with given -peer-address from the Raft configuration.
There are rare cases where a peer may be left behind in the Raft quorum even
though the server is no longer present and known to the cluster. This command
can be used to remove the failed server so that it is no longer affects the Raft
quorum. If the server still shows in the output of the "nomad server-members"
command, it is preferable to clean up by simply running "nomad
server-force-leave" instead of this command.
General Options:
` + generalOptionsUsage() + `
Remove Peer Options:
-peer-address="IP:port"
Remove a Nomad server with given address from the Raft configuration.
`
return strings.TrimSpace(helpText)
}
func (c *OperatorRaftRemoveCommand) Synopsis() string {
return "Remove a Nomad server from the Raft configuration"
}
func (c *OperatorRaftRemoveCommand) Run(args []string) int {
var peerAddress string
flags := c.Meta.FlagSet("raft", FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.StringVar(&peerAddress, "peer-address", "", "")
if err := flags.Parse(args); err != nil {
c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err))
return 1
}
// Set up a client.
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}
operator := client.Operator()
// TODO (alexdadgar) Once we expose IDs, add support for removing
// by ID, add support for that.
if len(peerAddress) == 0 {
c.Ui.Error(fmt.Sprintf("an address is required for the peer to remove"))
return 1
}
// Try to kick the peer.
w := &api.WriteOptions{}
if err := operator.RaftRemovePeerByAddress(peerAddress, w); err != nil {
c.Ui.Error(fmt.Sprintf("Failed to remove raft peer: %v", err))
return 1
}
c.Ui.Output(fmt.Sprintf("Removed peer with address %q", peerAddress))
return 0
}

View File

@ -0,0 +1,32 @@
package command
import (
"strings"
"testing"
"github.com/mitchellh/cli"
)
func TestOperator_Raft_RemovePeers_Implements(t *testing.T) {
var _ cli.Command = &OperatorRaftRemoveCommand{}
}
func TestOperator_Raft_RemovePeer(t *testing.T) {
s, _, addr := testServer(t, nil)
defer s.Stop()
ui := new(cli.MockUi)
c := &OperatorRaftRemoveCommand{Meta: Meta{Ui: ui}}
args := []string{"-address=" + addr, "-peer-address=nope"}
code := c.Run(args)
if code != 1 {
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
}
// If we get this error, it proves we sent the address all they through.
output := strings.TrimSpace(ui.ErrorWriter.String())
if !strings.Contains(output, "address \"nope\" was not found in the Raft configuration") {
t.Fatalf("bad: %s", output)
}
}

View File

@ -0,0 +1,11 @@
package command
import (
"testing"
"github.com/mitchellh/cli"
)
func TestOperator_Raft_Implements(t *testing.T) {
var _ cli.Command = &OperatorRaftCommand{}
}

11
command/operator_test.go Normal file
View File

@ -0,0 +1,11 @@
package command
import (
"testing"
"github.com/mitchellh/cli"
)
func TestOperator_Implements(t *testing.T) {
var _ cli.Command = &OperatorCommand{}
}

View File

@ -115,6 +115,30 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory {
}, nil
},
"operator": func() (cli.Command, error) {
return &command.OperatorCommand{
Meta: meta,
}, nil
},
"operator raft": func() (cli.Command, error) {
return &command.OperatorRaftCommand{
Meta: meta,
}, nil
},
"operator raft list-peers": func() (cli.Command, error) {
return &command.OperatorRaftListCommand{
Meta: meta,
}, nil
},
"operator raft remove-peer": func() (cli.Command, error) {
return &command.OperatorRaftRemoveCommand{
Meta: meta,
}, nil
},
"plan": func() (cli.Command, error) {
return &command.PlanCommand{
Meta: meta,

View File

@ -39,6 +39,8 @@ func RunCustom(args []string, commands map[string]cli.CommandFactory) int {
switch k {
case "executor":
case "syslog":
case "operator raft", "operator raft list-peers", "operator raft remove-peer":
case "job dispatch":
case "fs ls", "fs cat", "fs stat":
case "check":
default:

107
nomad/operator_endpoint.go Normal file
View File

@ -0,0 +1,107 @@
package nomad
import (
"fmt"
"net"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf"
)
// Operator endpoint is used to perform low-level operator tasks for Nomad.
type Operator struct {
srv *Server
}
// RaftGetConfiguration is used to retrieve the current Raft configuration.
func (op *Operator) RaftGetConfiguration(args *structs.GenericRequest, reply *structs.RaftConfigurationResponse) error {
if done, err := op.srv.forward("Operator.RaftGetConfiguration", args, args, reply); done {
return err
}
// We can't fetch the leader and the configuration atomically with
// the current Raft API.
future := op.srv.raft.GetConfiguration()
if err := future.Error(); err != nil {
return err
}
// Index the Nomad information about the servers.
serverMap := make(map[raft.ServerAddress]serf.Member)
for _, member := range op.srv.serf.Members() {
valid, parts := isNomadServer(member)
if !valid {
continue
}
addr := (&net.TCPAddr{IP: member.Addr, Port: parts.Port}).String()
serverMap[raft.ServerAddress(addr)] = member
}
// Fill out the reply.
leader := op.srv.raft.Leader()
reply.Index = future.Index()
for _, server := range future.Configuration().Servers {
node := "(unknown)"
if member, ok := serverMap[server.Address]; ok {
node = member.Name
}
entry := &structs.RaftServer{
ID: server.ID,
Node: node,
Address: server.Address,
Leader: server.Address == leader,
Voter: server.Suffrage == raft.Voter,
}
reply.Servers = append(reply.Servers, entry)
}
return nil
}
// RaftRemovePeerByAddress is used to kick a stale peer (one that it in the Raft
// quorum but no longer known to Serf or the catalog) by address in the form of
// "IP:port". The reply argument is not used, but it required to fulfill the RPC
// interface.
func (op *Operator) RaftRemovePeerByAddress(args *structs.RaftPeerByAddressRequest, reply *struct{}) error {
if done, err := op.srv.forward("Operator.RaftRemovePeerByAddress", args, args, reply); done {
return err
}
// Since this is an operation designed for humans to use, we will return
// an error if the supplied address isn't among the peers since it's
// likely they screwed up.
{
future := op.srv.raft.GetConfiguration()
if err := future.Error(); err != nil {
return err
}
for _, s := range future.Configuration().Servers {
if s.Address == args.Address {
goto REMOVE
}
}
return fmt.Errorf("address %q was not found in the Raft configuration",
args.Address)
}
REMOVE:
// The Raft library itself will prevent various forms of foot-shooting,
// like making a configuration with no voters. Some consideration was
// given here to adding more checks, but it was decided to make this as
// low-level and direct as possible. We've got ACL coverage to lock this
// down, and if you are an operator, it's assumed you know what you are
// doing if you are calling this. If you remove a peer that's known to
// Serf, for example, it will come back when the leader does a reconcile
// pass.
future := op.srv.raft.RemovePeer(args.Address)
if err := future.Error(); err != nil {
op.srv.logger.Printf("[WARN] nomad.operator: Failed to remove Raft peer %q: %v",
args.Address, err)
return err
}
op.srv.logger.Printf("[WARN] nomad.operator: Removed Raft peer %q", args.Address)
return nil
}

View File

@ -0,0 +1,109 @@
package nomad
import (
"fmt"
"reflect"
"strings"
"testing"
"github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/hashicorp/raft"
)
func TestOperator_RaftGetConfiguration(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
arg := structs.GenericRequest{
QueryOptions: structs.QueryOptions{
Region: s1.config.Region,
},
}
var reply structs.RaftConfigurationResponse
if err := msgpackrpc.CallWithCodec(codec, "Operator.RaftGetConfiguration", &arg, &reply); err != nil {
t.Fatalf("err: %v", err)
}
future := s1.raft.GetConfiguration()
if err := future.Error(); err != nil {
t.Fatalf("err: %v", err)
}
if len(future.Configuration().Servers) != 1 {
t.Fatalf("bad: %v", future.Configuration().Servers)
}
me := future.Configuration().Servers[0]
expected := structs.RaftConfigurationResponse{
Servers: []*structs.RaftServer{
&structs.RaftServer{
ID: me.ID,
Node: fmt.Sprintf("%v.%v", s1.config.NodeName, s1.config.Region),
Address: me.Address,
Leader: true,
Voter: true,
},
},
Index: future.Index(),
}
if !reflect.DeepEqual(reply, expected) {
t.Fatalf("bad: got %+v; want %+v", reply, expected)
}
}
func TestOperator_RaftRemovePeerByAddress(t *testing.T) {
s1 := testServer(t, nil)
defer s1.Shutdown()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Try to remove a peer that's not there.
arg := structs.RaftPeerByAddressRequest{
Address: raft.ServerAddress(fmt.Sprintf("127.0.0.1:%d", getPort())),
}
arg.Region = s1.config.Region
var reply struct{}
err := msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByAddress", &arg, &reply)
if err == nil || !strings.Contains(err.Error(), "not found in the Raft configuration") {
t.Fatalf("err: %v", err)
}
// Add it manually to Raft.
{
future := s1.raft.AddPeer(arg.Address)
if err := future.Error(); err != nil {
t.Fatalf("err: %v", err)
}
}
// Make sure it's there.
{
future := s1.raft.GetConfiguration()
if err := future.Error(); err != nil {
t.Fatalf("err: %v", err)
}
configuration := future.Configuration()
if len(configuration.Servers) != 2 {
t.Fatalf("bad: %v", configuration)
}
}
// Remove it, now it should go through.
if err := msgpackrpc.CallWithCodec(codec, "Operator.RaftRemovePeerByAddress", &arg, &reply); err != nil {
t.Fatalf("err: %v", err)
}
// Make sure it's not there.
{
future := s1.raft.GetConfiguration()
if err := future.Error(); err != nil {
t.Fatalf("err: %v", err)
}
configuration := future.Configuration()
if len(configuration.Servers) != 1 {
t.Fatalf("bad: %v", configuration)
}
}
}

View File

@ -162,6 +162,7 @@ type endpoints struct {
Region *Region
Periodic *Periodic
System *System
Operator *Operator
}
// NewServer is used to construct a new Nomad server from the
@ -639,25 +640,27 @@ func (s *Server) setupVaultClient() error {
// setupRPC is used to setup the RPC listener
func (s *Server) setupRPC(tlsWrap tlsutil.RegionWrapper) error {
// Create endpoints
s.endpoints.Status = &Status{s}
s.endpoints.Node = &Node{srv: s}
s.endpoints.Job = &Job{s}
s.endpoints.Eval = &Eval{s}
s.endpoints.Plan = &Plan{s}
s.endpoints.Alloc = &Alloc{s}
s.endpoints.Region = &Region{s}
s.endpoints.Eval = &Eval{s}
s.endpoints.Job = &Job{s}
s.endpoints.Node = &Node{srv: s}
s.endpoints.Operator = &Operator{s}
s.endpoints.Periodic = &Periodic{s}
s.endpoints.Plan = &Plan{s}
s.endpoints.Region = &Region{s}
s.endpoints.Status = &Status{s}
s.endpoints.System = &System{s}
// Register the handlers
s.rpcServer.Register(s.endpoints.Status)
s.rpcServer.Register(s.endpoints.Node)
s.rpcServer.Register(s.endpoints.Job)
s.rpcServer.Register(s.endpoints.Eval)
s.rpcServer.Register(s.endpoints.Plan)
s.rpcServer.Register(s.endpoints.Alloc)
s.rpcServer.Register(s.endpoints.Region)
s.rpcServer.Register(s.endpoints.Eval)
s.rpcServer.Register(s.endpoints.Job)
s.rpcServer.Register(s.endpoints.Node)
s.rpcServer.Register(s.endpoints.Operator)
s.rpcServer.Register(s.endpoints.Periodic)
s.rpcServer.Register(s.endpoints.Plan)
s.rpcServer.Register(s.endpoints.Region)
s.rpcServer.Register(s.endpoints.Status)
s.rpcServer.Register(s.endpoints.System)
list, err := net.ListenTCP("tcp", s.config.RPCAddr)

49
nomad/structs/operator.go Normal file
View File

@ -0,0 +1,49 @@
package structs
import (
"github.com/hashicorp/raft"
)
// RaftServer has information about a server in the Raft configuration.
type RaftServer struct {
// ID is the unique ID for the server. These are currently the same
// as the address, but they will be changed to a real GUID in a future
// release of Nomad.
ID raft.ServerID
// Node is the node name of the server, as known by Nomad, or this
// will be set to "(unknown)" otherwise.
Node string
// Address is the IP:port of the server, used for Raft communications.
Address raft.ServerAddress
// Leader is true if this server is the current cluster leader.
Leader bool
// Voter is true if this server has a vote in the cluster. This might
// be false if the server is staging and still coming online, or if
// it's a non-voting server, which will be added in a future release of
// Nomad.
Voter bool
}
// RaftConfigrationResponse is returned when querying for the current Raft
// configuration.
type RaftConfigurationResponse struct {
// Servers has the list of servers in the Raft configuration.
Servers []*RaftServer
// Index has the Raft index of this configuration.
Index uint64
}
// RaftPeerByAddressRequest is used by the Operator endpoint to apply a Raft
// operation on a specific Raft peer by address in the form of "IP:port".
type RaftPeerByAddressRequest struct {
// Address is the peer to remove, in the form "IP:port".
Address raft.ServerAddress
// WriteRequest holds the Region for this request.
WriteRequest
}

View File

@ -0,0 +1,31 @@
---
layout: "docs"
page_title: "Commands: operator"
sidebar_current: "docs-commands-operator"
description: >
The operator command provides cluster-level tools for Nomad operators.
---
# Nomad Operator
Command: `nomad operator`
The `operator` command provides cluster-level tools for Nomad operators, such
as interacting with the Raft subsystem. This was added in Nomad 0.5.5.
~> Use this command with extreme caution, as improper use could lead to a Nomad
outage and even loss of data.
See the [Outage Recovery](TODO alexdadgar) guide for some examples of how
this command is used. For an API to perform these operations programatically,
please see the documentation for the [Operator](/docs/agent/http/operator.html)
endpoint.
## Usage
Usage: `nomad operator <subcommand> <subcommand> [options]`
Run `nomad operator <subcommand>` with no arguments for help on that subcommand.
The following subcommands are available:
* `raft` - View and modify Nomad's Raft configuration.

View File

@ -0,0 +1,62 @@
---
layout: "docs"
page_title: "Commands: operator raft list-peers"
sidebar_current: "docs-commands-operator-raft-list-peers"
description: >
Display the current Raft peer configuration.
---
# Command: `operator raft list-peers`
The Raft list-peers command is used to display the current Raft peer
configuration.
See the [Outage Recovery](TODO alexdadgar) guide for some examples of how
this command is used. For an API to perform these operations programatically,
please see the documentation for the [Operator](/docs/http/operator.html)
endpoint.
## Usage
```
nomad operator raft list-peers [options]
```
## General Options
<%= partial "docs/commands/_general_options" %>
## List Peers Options
* `-stale`: The stale argument defaults to "false" which means the leader
provides the result. If the cluster is in an outage state without a leader, you
may need to set `-stale` to "true" to get the configuration from a non-leader
server.
## Examples
An example output with three servers is as follows:
```
$ nomad operator raft list-peers
Node ID Address State Voter
nomad-server01.global 10.10.11.5:4647 10.10.11.5:4647 follower true
nomad-server02.global 10.10.11.6:4647 10.10.11.6:4647 leader true
nomad-server03.global 10.10.11.7:4647 10.10.11.7:4647 follower true
```
* `Node` is the node name of the server, as known to Nomad, or "(unknown)" if
the node is stale and not known.
* `ID` is the ID of the server. This is the same as the `Address` but may be
upgraded to a GUID in a future version of Nomad.
* `Address` is the IP:port for the server.
* `State` is either "follower" or "leader" depending on the server's role in the
Raft configuration.
* `Voter` is "true" or "false", indicating if the server has a vote in the Raft
configuration. Future versions of Nomad may add support for non-voting servers.

View File

@ -0,0 +1,41 @@
---
layout: "docs"
page_title: "Commands: operator raft remove-peer"
sidebar_current: "docs-commands-operator-raft-remove-peer"
description: >
Remove a Nomad server from the Raft configuration.
---
# Command: `operator raft remove-peer`
Remove the Nomad server with given address from the Raft configuration.
There are rare cases where a peer may be left behind in the Raft quorum even
though the server is no longer present and known to the cluster. This command
can be used to remove the failed server so that it is no longer affects the Raft
quorum. If the server still shows in the output of the [`nomad
server-members`](/docs/commands/server-members.html) command, it is preferable
to clean up by simply running [`nomad
server-force-leave`](/docs/commands/server-force-leave.html) instead of this
command.
See the [Outage Recovery](TODO alexdadgar) guide for some examples of how
this command is used. For an API to perform these operations programatically,
please see the documentation for the [Operator](/docs/http/operator.html)
endpoint.
## Usage
```
nomad operator raft remove-peer [options]
```
## General Options
<%= partial "docs/commands/_general_options" %>
## Remove Peer Options
* `-peer-address`: Remove a Nomad server with given address from the Raft
configuration. The format is "IP:port"

View File

@ -0,0 +1,166 @@
---
layout: "http"
page_title: "HTTP API: /v1/operator/"
sidebar_current: "docs-http-operator"
description: >
The '/v1/operator/' endpoints provides cluster-level tools for Nomad
operators.
---
# /v1/operator
The Operator endpoint provides cluster-level tools for Nomad operators, such
as interacting with the Raft subsystem. This was added in Nomad 0.5.5
~> Use this interface with extreme caution, as improper use could lead to a
Nomad outage and even loss of data.
See the [Outage Recovery](/docs/guides/outage.html) guide for some examples of how
these capabilities are used. For a CLI to perform these operations manually, please
see the documentation for the [`nomad operator`](/docs/commands/operator-index.html)
command.
By default, the agent's local region is used; another region can be specified
using the `?region=` query parameter.
## GET
<dl>
<dt>Description</dt>
<dd>
Query the status of a client node registered with Nomad.
</dd>
<dt>Method</dt>
<dd>GET</dd>
<dt>URL</dt>
<dd>`/v1/operator/raft/configuration`</dd>
<dt>Parameters</dt>
<dd>
<ul>
<li>
<span class="param">stale</span>
<span class="param-flags">optional</span>
If the cluster doesn't currently have a leader an error will be
returned. You can use the `?stale` query parameter to read the Raft
configuration from any of the Nomad servers.
</li>
</ul>
</dd>
<dt>Returns</dt>
<dd>
```javascript
{
"Servers": [
{
"ID": "127.0.0.1:4647",
"Node": "alice",
"Address": "127.0.0.1:4647",
"Leader": true,
"Voter": true
},
{
"ID": "127.0.0.2:4647",
"Node": "bob",
"Address": "127.0.0.2:4647",
"Leader": false,
"Voter": true
},
{
"ID": "127.0.0.3:4647",
"Node": "carol",
"Address": "127.0.0.3:4647",
"Leader": false,
"Voter": true
}
],
"Index": 22
}
```
</dd>
<dt>Field Reference</dt>
<dd>
<ul>
<li>
<span class="param">Servers</span>
The returned `Servers` array has information about the servers in the Raft
peer configuration. See the `Server` block for a description of its fields:
</li>
<li>
<span class="param">Index</span>
The `Index` value is the Raft corresponding to this configuration. The
latest configuration may not yet be committed if changes are in flight.
</li>
</ul>
`Server` Fields:
<ul>
<li>
<span class="param">ID</span>
`ID` is the ID of the server. This is the same as the `Address` but may
be upgraded to a GUID in a future version of Nomad.
</li>
<li>
<span class="param">Node</span>
`Node` is the node name of the server, as known to Nomad, or "(unknown)" if
the node is stale and not known.
</li>
<li>
<span class="param">Address</span>
`Address` is the IP:port for the server.
</li>
<li>
<span class="param">Leader</span>
`Leader` is either "true" or "false" depending on the server's role in the
Raft configuration.
</li>
<li>
<span class="param">Voter</span>
`Voter` is "true" or "false", indicating if the server has a vote in the Raft
configuration. Future versions of Nomad may add support for non-voting servers.
</li>
</ul>
</dd>
</dl>
## DELETE
<dl>
<dt>Description</dt>
<dd>
Remove the Nomad server with given address from the Raft configuration. The
return code signifies success or failure.
</dd>
<dt>Method</dt>
<dd>DELETE</dd>
<dt>URL</dt>
<dd>`/v1/operator/raft/peer`</dd>
<dt>Parameters</dt>
<dd>
<ul>
<li>
<span class="param">address</span>
<span class="param-flags">required</span>
The address specifies the server to remove and is given as an `IP:port`.
The port number is usually 4647, unless configured otherwise. Nothing is
required in the body of the request.
</li>
</ul>
</dd>
<dt>Returns</dt>
<dd>None</dd>
</dl>

View File

@ -228,6 +228,17 @@
<li<%= sidebar_current("docs-commands-node-status") %>>
<a href="/docs/commands/node-status.html">node-status</a>
</li>
<li<%= sidebar_current("docs-commands-operator") %>>
<a href="/docs/commands/operator-index.html">operator</a>
<ul class="nav">
<li<%= sidebar_current("docs-commands-operator-raft-list-peers") %>>
<a href="/docs/commands/operator-raft-list-peers.html">raft list-peers</a>
</li>
<li<%= sidebar_current("docs-commands-operator-raft-remove-peer") %>>
<a href="/docs/commands/operator-raft-remove-peer.html">raft remove-peer</a>
</li>
</ul>
</li>
<li<%= sidebar_current("docs-commands-plan") %>>
<a href="/docs/commands/plan.html">plan</a>
</li>

View File

@ -117,6 +117,10 @@
<a href="/docs/http/status.html">Status</a>
</li>
<li<%= sidebar_current("docs-http-operator") %>>
<a href="/docs/http/operator.html">Operator</a>
</li>
<li<%= sidebar_current("docs-http-system") %>>
<a href="/docs/http/system.html">System</a>
</li>