backport of commit 1339599185af9dbfcca6f0aa1001c6753b8c682b (#18517)
Co-authored-by: Gerard Nguyen <nguyenvanthao1991@gmail.com>
This commit is contained in:
parent
1425eecbbe
commit
c7b1966565
|
@ -0,0 +1,3 @@
|
|||
```release-note:feature
|
||||
cli: Add `-prune` flag to `nomad operator force-leave` command
|
||||
```
|
22
api/agent.go
22
api/agent.go
|
@ -35,6 +35,12 @@ type KeyringRequest struct {
|
|||
Key string
|
||||
}
|
||||
|
||||
// ForceLeaveOpts are used to configure the ForceLeave method.
|
||||
type ForceLeaveOpts struct {
|
||||
// Prune indicates whether to remove a node from the list of members
|
||||
Prune bool
|
||||
}
|
||||
|
||||
// Agent returns a new agent which can be used to query
|
||||
// the agent-specific endpoints.
|
||||
func (c *Client) Agent() *Agent {
|
||||
|
@ -163,7 +169,21 @@ func (a *Agent) MembersOpts(opts *QueryOptions) (*ServerMembers, error) {
|
|||
|
||||
// ForceLeave is used to eject an existing node from the cluster.
|
||||
func (a *Agent) ForceLeave(node string) error {
|
||||
_, err := a.client.put("/v1/agent/force-leave?node="+node, nil, nil, nil)
|
||||
v := url.Values{}
|
||||
v.Add("node", node)
|
||||
_, err := a.client.put("/v1/agent/force-leave?"+v.Encode(), nil, nil, nil)
|
||||
return err
|
||||
}
|
||||
|
||||
// ForceLeaveWithOptions is used to eject an existing node from the cluster
|
||||
// with additional options such as prune.
|
||||
func (a *Agent) ForceLeaveWithOptions(node string, opts ForceLeaveOpts) error {
|
||||
v := url.Values{}
|
||||
v.Add("node", node)
|
||||
if opts.Prune {
|
||||
v.Add("prune", "1")
|
||||
}
|
||||
_, err := a.client.put("/v1/agent/force-leave?"+v.Encode(), nil, nil, nil)
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
|
@ -126,6 +126,7 @@ func TestAgent_ForceLeave(t *testing.T) {
|
|||
must.One(t, n)
|
||||
|
||||
membersBefore, err := a.MembersOpts(&QueryOptions{})
|
||||
must.NoError(t, err)
|
||||
must.Eq(t, membersBefore.Members[1].Status, "alive")
|
||||
|
||||
err = a.ForceLeave(membersBefore.Members[1].Name)
|
||||
|
@ -152,6 +153,53 @@ func TestAgent_ForceLeave(t *testing.T) {
|
|||
wait.Timeout(3*time.Second),
|
||||
wait.Gap(100*time.Millisecond),
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
func TestAgent_ForceLeavePrune(t *testing.T) {
|
||||
testutil.Parallel(t)
|
||||
|
||||
c, s := makeClient(t, nil, nil)
|
||||
defer s.Stop()
|
||||
a := c.Agent()
|
||||
|
||||
nodeName := "foo"
|
||||
_, s2 := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
||||
c.NodeName = nodeName
|
||||
c.Server.BootstrapExpect = 0
|
||||
})
|
||||
|
||||
n, err := a.Join(s2.SerfAddr)
|
||||
must.NoError(t, err)
|
||||
must.One(t, n)
|
||||
membersBefore, err := a.MembersOpts(&QueryOptions{})
|
||||
must.NoError(t, err)
|
||||
|
||||
s2.Stop()
|
||||
|
||||
forceLeaveOpts := ForceLeaveOpts{
|
||||
Prune: true,
|
||||
}
|
||||
nodeName = nodeName + ".global"
|
||||
err = a.ForceLeaveWithOptions(nodeName, forceLeaveOpts)
|
||||
must.NoError(t, err)
|
||||
|
||||
f := func() error {
|
||||
membersAfter, err := a.MembersOpts(&QueryOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(membersAfter.Members) == len(membersBefore.Members) {
|
||||
return fmt.Errorf("node did not get pruned")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
must.Wait(t, wait.InitialSuccess(
|
||||
wait.ErrorFunc(f),
|
||||
wait.Timeout(5*time.Second),
|
||||
wait.Gap(100*time.Millisecond),
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
func (a *AgentMember) String() string {
|
||||
|
|
|
@ -322,8 +322,17 @@ func (s *HTTPServer) AgentForceLeaveRequest(resp http.ResponseWriter, req *http.
|
|||
return nil, CodedError(400, "missing node to force leave")
|
||||
}
|
||||
|
||||
prune, err := parseBool(req, "prune")
|
||||
if err != nil {
|
||||
return nil, CodedError(400, "invalid prune value")
|
||||
}
|
||||
|
||||
// Attempt remove
|
||||
err := srv.RemoveFailedNode(node)
|
||||
if prune != nil && *prune {
|
||||
err = srv.RemoveFailedNodePrune(node)
|
||||
} else {
|
||||
err = srv.RemoveFailedNode(node)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/posener/complete"
|
||||
)
|
||||
|
||||
|
@ -21,14 +22,22 @@ Usage: nomad server force-leave [options] <node>
|
|||
Forces an server to enter the "left" state. This can be used to
|
||||
eject nodes which have failed and will not rejoin the cluster.
|
||||
Note that if the member is actually still alive, it will
|
||||
eventually rejoin the cluster again.
|
||||
eventually rejoin the cluster again. The failed or left server will
|
||||
be garbage collected after 24h.
|
||||
|
||||
If ACLs are enabled, this option requires a token with the 'agent:write'
|
||||
capability.
|
||||
|
||||
General Options:
|
||||
|
||||
` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace)
|
||||
` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + `
|
||||
|
||||
Server Force-Leave Options:
|
||||
|
||||
-prune
|
||||
Removes failed or left server from the Serf member list immediately.
|
||||
If member is actually still alive, it will eventually rejoin the cluster again.
|
||||
`
|
||||
return strings.TrimSpace(helpText)
|
||||
}
|
||||
|
||||
|
@ -37,7 +46,10 @@ func (c *ServerForceLeaveCommand) Synopsis() string {
|
|||
}
|
||||
|
||||
func (c *ServerForceLeaveCommand) AutocompleteFlags() complete.Flags {
|
||||
return c.Meta.AutocompleteFlags(FlagSetClient)
|
||||
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
|
||||
complete.Flags{
|
||||
"-prune": complete.PredictNothing,
|
||||
})
|
||||
}
|
||||
|
||||
func (c *ServerForceLeaveCommand) AutocompleteArgs() complete.Predictor {
|
||||
|
@ -47,8 +59,11 @@ func (c *ServerForceLeaveCommand) AutocompleteArgs() complete.Predictor {
|
|||
func (c *ServerForceLeaveCommand) Name() string { return "server force-leave" }
|
||||
|
||||
func (c *ServerForceLeaveCommand) Run(args []string) int {
|
||||
var prune bool
|
||||
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
|
||||
flags.Usage = func() { c.Ui.Output(c.Help()) }
|
||||
flags.BoolVar(&prune, "prune", false, "Remove server completely from list of members")
|
||||
|
||||
if err := flags.Parse(args); err != nil {
|
||||
return 1
|
||||
}
|
||||
|
@ -70,7 +85,10 @@ func (c *ServerForceLeaveCommand) Run(args []string) int {
|
|||
}
|
||||
|
||||
// Call force-leave on the node
|
||||
if err := client.Agent().ForceLeave(node); err != nil {
|
||||
forceLeaveOpts := api.ForceLeaveOpts{
|
||||
Prune: prune,
|
||||
}
|
||||
if err := client.Agent().ForceLeaveWithOptions(node, forceLeaveOpts); err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error force-leaving server %s: %s", node, err))
|
||||
return 1
|
||||
}
|
||||
|
|
|
@ -1866,6 +1866,11 @@ func (s *Server) RemoveFailedNode(node string) error {
|
|||
return s.serf.RemoveFailedNode(node)
|
||||
}
|
||||
|
||||
// RemoveFailedNodePrune immediately removes a failed node from the list of members
|
||||
func (s *Server) RemoveFailedNodePrune(node string) error {
|
||||
return s.serf.RemoveFailedNodePrune(node)
|
||||
}
|
||||
|
||||
// KeyManager returns the Serf keyring manager
|
||||
func (s *Server) KeyManager() *serf.KeyManager {
|
||||
return s.serf.KeyManager()
|
||||
|
|
|
@ -441,13 +441,17 @@ The table below shows this endpoint's support for
|
|||
### Parameters
|
||||
|
||||
- `node` `(string: <required>)` - Specifies the name of the node to force leave.
|
||||
- `prune` `(boolean: <optional>)` - Removes failed or left server from the Serf
|
||||
member list immediately. If member is actually still alive, it will eventually rejoin
|
||||
the cluster again.
|
||||
|
||||
|
||||
### Sample Request
|
||||
|
||||
```shell-session
|
||||
$ curl \
|
||||
--request POST \
|
||||
https://localhost:4646/v1/agent/force-leave?node=client-ab2e23dc
|
||||
https://localhost:4646/v1/agent/force-leave?node=client-ab2e23dc&prune=true
|
||||
```
|
||||
|
||||
## Health
|
||||
|
|
|
@ -10,7 +10,9 @@ description: >
|
|||
|
||||
The `server force-leave` command forces a server to enter the "left" state.
|
||||
This can be used to eject server nodes which have failed and will not rejoin
|
||||
the cluster. Note that if the server is actually still alive, it will
|
||||
the cluster. The failed or left server will be garbage collected after `24h`.
|
||||
|
||||
~> Note that if the server is actually still alive, it will
|
||||
eventually rejoin the cluster again.
|
||||
|
||||
## Usage
|
||||
|
@ -22,6 +24,9 @@ nomad server force-leave [options] <node>
|
|||
This command expects only one argument - the node which should be forced
|
||||
to enter the "left" state.
|
||||
|
||||
Additionally, by specifying the `prune` flag, a failed or left node can be forcibly removed
|
||||
from the list of members immediately.
|
||||
|
||||
If ACLs are enabled, this option requires a token with the `agent:write`
|
||||
capability.
|
||||
|
||||
|
@ -29,6 +34,11 @@ capability.
|
|||
|
||||
@include 'general_options_no_namespace.mdx'
|
||||
|
||||
## Server Force-Leave Options
|
||||
|
||||
- `-prune`: Removes failed or left server from the Serf member list immediately.
|
||||
If member is actually still alive, it will eventually rejoin the cluster again.
|
||||
|
||||
## Examples
|
||||
|
||||
Force-leave the server "node1":
|
||||
|
@ -37,3 +47,10 @@ Force-leave the server "node1":
|
|||
$ nomad server force-leave node1
|
||||
|
||||
```
|
||||
|
||||
Force-leave the server "node1" and prune it:
|
||||
|
||||
```shell-session
|
||||
$ nomad server force-leave -prune node1
|
||||
|
||||
```
|
Loading…
Reference in New Issue