VAULT-11829: Add cluster status handler (#18351)
* go get link proto @vault-11829-meta-get-cluster-status * add HA status * add HAEnabled method * add raft config * allocate HA nodes based on actual count * add raft autopilot status * add raft quorum warnings * add ClusterID method * add StorageType * add ClusterID * update github.com/hashicorp/vault/vault/hcp_link/proto * add changelog entry * fix raft config panic * remove "Warning" quorum message prefix * add error wrapping * add Core.HAStateWithLock method * reduce quorum warnings to single string * fix HCP_API_HOST test env var check * Revert "fix HCP_API_HOST test env var check" This reverts commit 97c73c4798b77b84aea84f341f2c63c4d657914d.
This commit is contained in:
parent
42e8551fba
commit
bb0c92afe7
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
hcp/status: Add cluster-level status information
|
||||
```
|
2
go.mod
2
go.mod
|
@ -138,7 +138,7 @@ require (
|
|||
github.com/hashicorp/vault/api/auth/approle v0.1.0
|
||||
github.com/hashicorp/vault/api/auth/userpass v0.1.0
|
||||
github.com/hashicorp/vault/sdk v0.6.1
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342
|
||||
github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab
|
||||
github.com/jackc/pgx/v4 v4.15.0
|
||||
github.com/jcmturner/gokrb5/v8 v8.4.2
|
||||
|
|
10
go.sum
10
go.sum
|
@ -1173,6 +1173,16 @@ github.com/hashicorp/vault-testing-stepwise v0.1.2 h1:3obC/ziAPGnsz2IQxr5e4Ayb7t
|
|||
github.com/hashicorp/vault-testing-stepwise v0.1.2/go.mod h1:TeU6B+5NqxUjto+Zey+QQEH1iywuHn0ciHZNYh4q3uI=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d h1:U692VbDl6ww5GQsNFClJVFJDaPeuqtDt1Mwqf21KYek=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221213220056-b0613b59f419 h1:yl6f//YTaTTGKJwyOpRe7v1DDPrzP+NErwgnef6qx7A=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221213220056-b0613b59f419/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230103211812-c28545e74f94 h1:Rx4Q2/mOPqJuanzwZYttDkWjdibPv3UpvsvKmOkl6h4=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230103211812-c28545e74f94/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230105183308-048241517ffb h1:PgXcBszV61BvxD0wZzm4QCz9btgTWX74NO4be6S2afU=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230105183308-048241517ffb/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106184443-96cfe11e7051 h1:cMQoRbIUMhbM0NsmP6hH3S3ZmAPVgic3g3L8Z55rXCI=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106184443-96cfe11e7051/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342 h1:9cMwZnaAV/lKs8EZsvBF00wPt350wD3sg/xqWGeN4gM=
|
||||
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
|
||||
github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443 h1:O/pT5C1Q3mVXMyuqg7yuAWUg/jMZR1/0QTzTRdNR6Uw=
|
||||
github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443/go.mod h1:bEpDU35nTu0ey1EXjwNwPjI9xErAsoOCmcMb9GKvyxo=
|
||||
github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=
|
||||
|
|
|
@ -388,3 +388,7 @@ func (c *Core) SetClusterListenerAddrs(addrs []*net.TCPAddr) {
|
|||
func (c *Core) SetClusterHandler(handler http.Handler) {
|
||||
c.clusterHandler = handler
|
||||
}
|
||||
|
||||
func (c *Core) ClusterID() string {
|
||||
return c.clusterID.Load()
|
||||
}
|
||||
|
|
|
@ -688,6 +688,13 @@ func (c *Core) HAState() consts.HAState {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *Core) HAStateWithLock() consts.HAState {
|
||||
c.stateLock.RLock()
|
||||
c.stateLock.RUnlock()
|
||||
|
||||
return c.HAState()
|
||||
}
|
||||
|
||||
// CoreConfig is used to parameterize a core
|
||||
type CoreConfig struct {
|
||||
entCoreConfig
|
||||
|
@ -3699,3 +3706,26 @@ func (c *Core) GetHCPLinkStatus() (string, string) {
|
|||
|
||||
return status, resourceID
|
||||
}
|
||||
|
||||
func (c *Core) HAEnabled() bool {
|
||||
return c.ha != nil && c.ha.HAEnabled()
|
||||
}
|
||||
|
||||
func (c *Core) GetRaftConfiguration(ctx context.Context) (*raft.RaftConfigurationResponse, error) {
|
||||
raftBackend := c.getRaftBackend()
|
||||
|
||||
if raftBackend == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return raftBackend.GetConfiguration(ctx)
|
||||
}
|
||||
|
||||
func (c *Core) GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState, error) {
|
||||
raftBackend := c.getRaftBackend()
|
||||
if raftBackend == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return raftBackend.GetAutopilotServerState(ctx)
|
||||
}
|
||||
|
|
|
@ -4,12 +4,14 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-hclog"
|
||||
scada "github.com/hashicorp/hcp-scada-provider"
|
||||
"github.com/hashicorp/vault/helper/namespace"
|
||||
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||
"github.com/hashicorp/vault/vault"
|
||||
"github.com/hashicorp/vault/vault/cluster"
|
||||
"github.com/hashicorp/vault/vault/hcp_link/capabilities"
|
||||
|
@ -23,7 +25,7 @@ import (
|
|||
type hcpLinkMetaHandler struct {
|
||||
meta.UnimplementedHCPLinkMetaServer
|
||||
|
||||
wrappedCore internal.WrappedCoreListNamespacesMounts
|
||||
wrappedCore internal.WrappedCoreMeta
|
||||
scadaProvider scada.SCADAProvider
|
||||
logger hclog.Logger
|
||||
|
||||
|
@ -129,7 +131,7 @@ func (h *hcpLinkMetaHandler) ListNamespaces(ctx context.Context, req *meta.ListN
|
|||
func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMountsRequest) (*meta.ListMountsResponse, error) {
|
||||
mountEntries, err := h.wrappedCore.ListMounts()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("unable to list secret mounts: %w", err)
|
||||
}
|
||||
|
||||
var mounts []*meta.Mount
|
||||
|
@ -140,7 +142,7 @@ func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMount
|
|||
if nsID != namespace.RootNamespaceID {
|
||||
ns, err := h.wrappedCore.NamespaceByID(ctx, entry.NamespaceID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("unable to get namespace associated with secret mount: %w", err)
|
||||
}
|
||||
|
||||
path = ns.Path + path
|
||||
|
@ -161,7 +163,7 @@ func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMount
|
|||
func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsRequest) (*meta.ListAuthResponse, error) {
|
||||
authEntries, err := h.wrappedCore.ListAuths()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("unable to list auth mounts: %w", err)
|
||||
}
|
||||
|
||||
var auths []*meta.Auth
|
||||
|
@ -172,7 +174,7 @@ func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsR
|
|||
if nsID != namespace.RootNamespaceID {
|
||||
ns, err := h.wrappedCore.NamespaceByID(ctx, entry.NamespaceID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("unable to get namespace associated with auth mount: %w", err)
|
||||
}
|
||||
|
||||
path = ns.Path + path
|
||||
|
@ -189,3 +191,113 @@ func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsR
|
|||
Auths: auths,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (h *hcpLinkMetaHandler) GetClusterStatus(ctx context.Context, req *meta.GetClusterStatusRequest) (*meta.GetClusterStatusResponse, error) {
|
||||
if h.wrappedCore.HAStateWithLock() != consts.Active {
|
||||
return nil, fmt.Errorf("node not active")
|
||||
}
|
||||
|
||||
hostname, err := os.Hostname()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to fetch hostname: %w", err)
|
||||
}
|
||||
|
||||
haEnabled := h.wrappedCore.HAEnabled()
|
||||
haStatus := &meta.HAStatus{
|
||||
Enabled: haEnabled,
|
||||
}
|
||||
|
||||
if haEnabled {
|
||||
leader := &meta.HANode{
|
||||
Hostname: hostname,
|
||||
}
|
||||
|
||||
peers := h.wrappedCore.GetHAPeerNodesCached()
|
||||
|
||||
haNodes := make([]*meta.HANode, len(peers)+1)
|
||||
haNodes[0] = leader
|
||||
|
||||
for i, peerNode := range peers {
|
||||
haNodes[i+1] = &meta.HANode{
|
||||
Hostname: peerNode.Hostname,
|
||||
}
|
||||
}
|
||||
|
||||
haStatus.Nodes = haNodes
|
||||
}
|
||||
|
||||
raftStatus := &meta.RaftStatus{}
|
||||
raftConfig, err := h.wrappedCore.GetRaftConfiguration(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get Raft configuration: %w", err)
|
||||
}
|
||||
|
||||
if raftConfig != nil {
|
||||
raftServers := make([]*meta.RaftServer, len(raftConfig.Servers))
|
||||
|
||||
var voterCount uint32
|
||||
for i, srv := range raftConfig.Servers {
|
||||
raftServers[i] = &meta.RaftServer{
|
||||
NodeID: srv.NodeID,
|
||||
Address: srv.Address,
|
||||
Voter: srv.Voter,
|
||||
Leader: srv.Leader,
|
||||
ProtocolVersion: srv.ProtocolVersion,
|
||||
}
|
||||
|
||||
if srv.Voter {
|
||||
voterCount++
|
||||
}
|
||||
}
|
||||
|
||||
raftStatus.RaftConfiguration = &meta.RaftConfiguration{
|
||||
Servers: raftServers,
|
||||
}
|
||||
|
||||
evenVoterMessage := "Vault should have access to an odd number of voter nodes."
|
||||
largeClusterMessage := "Very large cluster detected."
|
||||
var quorumWarning string
|
||||
|
||||
if voterCount == 1 {
|
||||
quorumWarning = "Only one server node found. Vault is not running in high availability mode."
|
||||
} else if voterCount%2 == 0 && voterCount > 7 {
|
||||
quorumWarning = evenVoterMessage + " " + largeClusterMessage
|
||||
} else if voterCount%2 == 0 {
|
||||
quorumWarning = evenVoterMessage
|
||||
} else if voterCount > 7 {
|
||||
quorumWarning = largeClusterMessage
|
||||
}
|
||||
|
||||
raftStatus.QuorumWarning = quorumWarning
|
||||
}
|
||||
|
||||
raftAutopilotState, err := h.wrappedCore.GetRaftAutopilotState(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get Raft Autopilot state: %w", err)
|
||||
}
|
||||
|
||||
if raftAutopilotState != nil {
|
||||
autopilotStatus := &meta.AutopilotStatus{
|
||||
Healthy: raftAutopilotState.Healthy,
|
||||
}
|
||||
|
||||
autopilotServers := make([]*meta.AutopilotServer, 0)
|
||||
for _, srv := range raftAutopilotState.Servers {
|
||||
autopilotServers = append(autopilotServers, &meta.AutopilotServer{
|
||||
ID: srv.ID,
|
||||
Healthy: srv.Healthy,
|
||||
})
|
||||
}
|
||||
|
||||
raftStatus.AutopilotStatus = autopilotStatus
|
||||
}
|
||||
|
||||
resp := &meta.GetClusterStatusResponse{
|
||||
ClusterID: h.wrappedCore.ClusterID(),
|
||||
HAStatus: haStatus,
|
||||
RaftStatus: raftStatus,
|
||||
StorageType: h.wrappedCore.StorageType(),
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"context"
|
||||
|
||||
"github.com/hashicorp/vault/helper/namespace"
|
||||
"github.com/hashicorp/vault/physical/raft"
|
||||
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||
"github.com/hashicorp/vault/sdk/logical"
|
||||
"github.com/hashicorp/vault/vault"
|
||||
|
@ -30,14 +31,21 @@ type WrappedCoreHCPToken interface {
|
|||
|
||||
var _ WrappedCoreHCPToken = &vault.Core{}
|
||||
|
||||
type WrappedCoreListNamespacesMounts interface {
|
||||
type WrappedCoreMeta interface {
|
||||
NamespaceByID(ctx context.Context, nsID string) (*namespace.Namespace, error)
|
||||
ListNamespaces(includePath bool) []*namespace.Namespace
|
||||
ListMounts() ([]*vault.MountEntry, error)
|
||||
ListAuths() ([]*vault.MountEntry, error)
|
||||
HAEnabled() bool
|
||||
HAStateWithLock() consts.HAState
|
||||
GetHAPeerNodesCached() []vault.PeerNode
|
||||
GetRaftConfiguration(ctx context.Context) (*raft.RaftConfigurationResponse, error)
|
||||
GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState, error)
|
||||
StorageType() string
|
||||
ClusterID() string
|
||||
}
|
||||
|
||||
var _ WrappedCoreListNamespacesMounts = &vault.Core{}
|
||||
var _ WrappedCoreMeta = &vault.Core{}
|
||||
|
||||
type WrappedCoreHCPLinkStatus interface {
|
||||
WrappedCoreStandbyStates
|
||||
|
|
|
@ -796,7 +796,7 @@ func (c *Core) handleCancelableRequest(ctx context.Context, req *logical.Request
|
|||
}
|
||||
|
||||
if walState.LocalIndex != 0 || walState.ReplicatedIndex != 0 {
|
||||
walState.ClusterID = c.clusterID.Load()
|
||||
walState.ClusterID = c.ClusterID()
|
||||
if walState.LocalIndex == 0 {
|
||||
if c.perfStandby {
|
||||
walState.LocalIndex = LastRemoteWAL(c)
|
||||
|
@ -2343,7 +2343,7 @@ func (c *Core) checkSSCTokenInternal(ctx context.Context, token string, isPerfSt
|
|||
return plainToken.Random, nil
|
||||
}
|
||||
|
||||
requiredWalState := &logical.WALState{ClusterID: c.clusterID.Load(), LocalIndex: plainToken.LocalIndex, ReplicatedIndex: 0}
|
||||
requiredWalState := &logical.WALState{ClusterID: c.ClusterID(), LocalIndex: plainToken.LocalIndex, ReplicatedIndex: 0}
|
||||
if c.HasWALState(requiredWalState, isPerfStandby) {
|
||||
return plainToken.Random, nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue