Backport of [NET-3865] [Supportability] Additional Information in the output of 'consul operator raft list-peers' into release/1.16.x (#17738)

* backport of commit 2f94024b356f774d0032ad069babb1a3fce6cfbd

* backport of commit 7626d0992dd59f424d893dd462bbde3a9cb0646c

* backport of commit 79aabc958aed60b2bd97c836529d53a627262124

* backport of commit 44eee4168b5a0d405636dbcb5b14ee7959334550

* cherry-pick

* removed detaied flag

* fix change

* removed autopilot health

* removed tests

* removed detailed flag

---------

Co-authored-by: Ashesh Vidyut <ashesh.vidyut@hashicorp.com>
Co-authored-by: David Yu <dyu@hashicorp.com>
Co-authored-by: Ashesh Vidyut <134911583+absolutelightning@users.noreply.github.com>
This commit is contained in:
hc-github-team-consul-core 2023-06-21 01:56:48 -04:00 committed by GitHub
parent dc10fa5e25
commit 3bed38c630
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 67 additions and 10 deletions

3
.changelog/17582.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:feature
cli: `consul operator raft list-peers` command shows the number of commits each follower is trailing the leader by to aid in troubleshooting.
```

View File

@ -48,6 +48,12 @@ func (op *Operator) RaftGetConfiguration(args *structs.DCSpecificRequest, reply
serverMap[raft.ServerAddress(addr)] = member
}
serverIDLastIndexMap := make(map[raft.ServerID]uint64)
for _, serverState := range op.srv.autopilot.GetState().Servers {
serverIDLastIndexMap[serverState.Server.ID] = serverState.Stats.LastIndex
}
// Fill out the reply.
leader := op.srv.raft.Leader()
reply.Index = future.Index()
@ -66,6 +72,7 @@ func (op *Operator) RaftGetConfiguration(args *structs.DCSpecificRequest, reply
Leader: server.Address == leader,
Voter: server.Suffrage == raft.Voter,
ProtocolVersion: raftProtocolVersion,
LastIndex: serverIDLastIndexMap[server.ID],
}
reply.Servers = append(reply.Servers, entry)
}

View File

@ -50,6 +50,13 @@ func TestOperator_RaftGetConfiguration(t *testing.T) {
if len(future.Configuration().Servers) != 1 {
t.Fatalf("bad: %v", future.Configuration().Servers)
}
serverIDLastIndexMap := make(map[raft.ServerID]uint64)
for _, serverState := range s1.autopilot.GetState().Servers {
serverIDLastIndexMap[serverState.Server.ID] = serverState.Stats.LastIndex
}
me := future.Configuration().Servers[0]
expected := structs.RaftConfigurationResponse{
Servers: []*structs.RaftServer{
@ -60,6 +67,7 @@ func TestOperator_RaftGetConfiguration(t *testing.T) {
Leader: true,
Voter: true,
ProtocolVersion: "3",
LastIndex: serverIDLastIndexMap[me.ID],
},
},
Index: future.Index(),
@ -113,6 +121,10 @@ func TestOperator_RaftGetConfiguration_ACLDeny(t *testing.T) {
if len(future.Configuration().Servers) != 1 {
t.Fatalf("bad: %v", future.Configuration().Servers)
}
serverIDLastIndexMap := make(map[raft.ServerID]uint64)
for _, serverState := range s1.autopilot.GetState().Servers {
serverIDLastIndexMap[serverState.Server.ID] = serverState.Stats.LastIndex
}
me := future.Configuration().Servers[0]
expected := structs.RaftConfigurationResponse{
Servers: []*structs.RaftServer{
@ -123,6 +135,7 @@ func TestOperator_RaftGetConfiguration_ACLDeny(t *testing.T) {
Leader: true,
Voter: true,
ProtocolVersion: "3",
LastIndex: serverIDLastIndexMap[me.ID],
},
},
Index: future.Index(),

View File

@ -34,6 +34,9 @@ type RaftServer struct {
// it's a non-voting server, which will be added in a future release of
// Consul.
Voter bool
// LastIndex is the last log index this server has a record of in its Raft log.
LastIndex uint64
}
// RaftConfigurationResponse is returned when querying for the current Raft

View File

@ -28,6 +28,9 @@ type RaftServer struct {
// it's a non-voting server, which will be added in a future release of
// Consul.
Voter bool
// LastIndex is the last log index this server has a record of in its Raft log.
LastIndex uint64
}
// RaftConfiguration is returned when querying for the current Raft configuration.

View File

@ -70,8 +70,24 @@ func raftListPeers(client *api.Client, stale bool) (string, error) {
return "", fmt.Errorf("Failed to retrieve raft configuration: %v", err)
}
leaderLastCommitIndex := uint64(0)
serverIdLastIndexMap := make(map[string]uint64)
for _, raftServer := range reply.Servers {
serverIdLastIndexMap[raftServer.ID] = raftServer.LastIndex
}
for _, s := range reply.Servers {
if s.Leader {
lastIndex, ok := serverIdLastIndexMap[s.ID]
if ok {
leaderLastCommitIndex = lastIndex
}
}
}
// Format it as a nice table.
result := []string{"Node\x1fID\x1fAddress\x1fState\x1fVoter\x1fRaftProtocol"}
result := []string{"Node\x1fID\x1fAddress\x1fState\x1fVoter\x1fRaftProtocol\x1fCommit Index\x1fTrails Leader By"}
for _, s := range reply.Servers {
raftProtocol := s.ProtocolVersion
@ -82,8 +98,20 @@ func raftListPeers(client *api.Client, stale bool) (string, error) {
if s.Leader {
state = "leader"
}
result = append(result, fmt.Sprintf("%s\x1f%s\x1f%s\x1f%s\x1f%v\x1f%s",
s.Node, s.ID, s.Address, state, s.Voter, raftProtocol))
trailsLeaderByText := "-"
serverLastIndex, ok := serverIdLastIndexMap[s.ID]
if ok {
trailsLeaderBy := leaderLastCommitIndex - serverLastIndex
trailsLeaderByText = fmt.Sprintf("%d commits", trailsLeaderBy)
if s.Leader {
trailsLeaderByText = "-"
} else if trailsLeaderBy == 1 {
trailsLeaderByText = fmt.Sprintf("%d commit", trailsLeaderBy)
}
}
result = append(result, fmt.Sprintf("%s\x1f%s\x1f%s\x1f%s\x1f%v\x1f%s\x1f%v\x1f%s",
s.Node, s.ID, s.Address, state, s.Voter, raftProtocol, serverLastIndex, trailsLeaderByText))
}
return columnize.Format(result, &columnize.Config{Delim: string([]byte{0x1f})}), nil

View File

@ -28,7 +28,7 @@ func TestOperatorRaftListPeersCommand(t *testing.T) {
a := agent.NewTestAgent(t, ``)
defer a.Shutdown()
expected := fmt.Sprintf("%s %s 127.0.0.1:%d leader true 3",
expected := fmt.Sprintf("%s %s 127.0.0.1:%d leader true 3 1 -",
a.Config.NodeName, a.Config.NodeID, a.Config.ServerPort)
// Test the list-peers subcommand directly

View File

@ -46,10 +46,10 @@ Usage: `consul operator raft list-peers -stale=[true|false]`
The output looks like this:
```text
Node ID Address State Voter RaftProtocol
alice 127.0.0.1:8300 127.0.0.1:8300 follower true 2
bob 127.0.0.2:8300 127.0.0.2:8300 leader true 3
carol 127.0.0.3:8300 127.0.0.3:8300 follower true 2
Node ID Address State Voter RaftProtocol Commit Index Trails Leader By
alice 127.0.0.1:8300 127.0.0.1:8300 follower true 2 1167 0 commits
bob 127.0.0.2:8300 127.0.0.2:8300 leader true 3 1167 -
carol 127.0.0.3:8300 127.0.0.3:8300 follower true 2 1159 8 commits
```
`Node` is the node name of the server, as known to Consul, or "(unknown)" if
@ -70,7 +70,7 @@ configuration.
- `-stale` - Enables non-leader servers to provide cluster state information.
If the cluster is in an outage state without a leader,
we recommend setting this option to `true.
we recommend setting this option to `true`.
Default is `false`.
## remove-peer
@ -109,7 +109,7 @@ The return code will indicate success or failure.
Corresponding HTTP API Endpoint: [\[POST\] /v1/operator/raft/transfer-leader](/consul/api-docs/operator/raft#transfer-raft-leadership)
This command transfers Raft leadership to another server agent. If an `id` is provided, Consul transfers leadership to the server with that id.
This command transfers Raft leadership to another server agent. If an `id` is provided, Consul transfers leadership to the server with that id.
Use this command to change leadership without restarting the leader node, which maintains quorum and workload capacity.