VAULT-11829: Add cluster status handler (#18351)

* go get link proto @vault-11829-meta-get-cluster-status

* add HA status

* add HAEnabled method

* add raft config

* allocate HA nodes based on actual count

* add raft autopilot status

* add raft quorum warnings

* add ClusterID method

* add StorageType

* add ClusterID

* update github.com/hashicorp/vault/vault/hcp_link/proto

* add changelog entry

* fix raft config panic

* remove "Warning" quorum message prefix

* add error wrapping

* add Core.HAStateWithLock method

* reduce quorum warnings to single string

* fix HCP_API_HOST test env var check

* Revert "fix HCP_API_HOST test env var check"

This reverts commit 97c73c4798b77b84aea84f341f2c63c4d657914d.
This commit is contained in:
Chris Capurso 2023-01-06 17:06:54 -05:00 committed by GitHub
parent 42e8551fba
commit bb0c92afe7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 177 additions and 10 deletions

3
changelog/18351.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
hcp/status: Add cluster-level status information
```

2
go.mod
View File

@ -138,7 +138,7 @@ require (
github.com/hashicorp/vault/api/auth/approle v0.1.0
github.com/hashicorp/vault/api/auth/userpass v0.1.0
github.com/hashicorp/vault/sdk v0.6.1
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342
github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab
github.com/jackc/pgx/v4 v4.15.0
github.com/jcmturner/gokrb5/v8 v8.4.2

10
go.sum
View File

@ -1173,6 +1173,16 @@ github.com/hashicorp/vault-testing-stepwise v0.1.2 h1:3obC/ziAPGnsz2IQxr5e4Ayb7t
github.com/hashicorp/vault-testing-stepwise v0.1.2/go.mod h1:TeU6B+5NqxUjto+Zey+QQEH1iywuHn0ciHZNYh4q3uI=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d h1:U692VbDl6ww5GQsNFClJVFJDaPeuqtDt1Mwqf21KYek=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221213220056-b0613b59f419 h1:yl6f//YTaTTGKJwyOpRe7v1DDPrzP+NErwgnef6qx7A=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221213220056-b0613b59f419/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230103211812-c28545e74f94 h1:Rx4Q2/mOPqJuanzwZYttDkWjdibPv3UpvsvKmOkl6h4=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230103211812-c28545e74f94/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230105183308-048241517ffb h1:PgXcBszV61BvxD0wZzm4QCz9btgTWX74NO4be6S2afU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230105183308-048241517ffb/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106184443-96cfe11e7051 h1:cMQoRbIUMhbM0NsmP6hH3S3ZmAPVgic3g3L8Z55rXCI=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106184443-96cfe11e7051/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342 h1:9cMwZnaAV/lKs8EZsvBF00wPt350wD3sg/xqWGeN4gM=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443 h1:O/pT5C1Q3mVXMyuqg7yuAWUg/jMZR1/0QTzTRdNR6Uw=
github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443/go.mod h1:bEpDU35nTu0ey1EXjwNwPjI9xErAsoOCmcMb9GKvyxo=
github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=

View File

@ -388,3 +388,7 @@ func (c *Core) SetClusterListenerAddrs(addrs []*net.TCPAddr) {
func (c *Core) SetClusterHandler(handler http.Handler) {
c.clusterHandler = handler
}
func (c *Core) ClusterID() string {
return c.clusterID.Load()
}

View File

@ -688,6 +688,13 @@ func (c *Core) HAState() consts.HAState {
}
}
func (c *Core) HAStateWithLock() consts.HAState {
c.stateLock.RLock()
c.stateLock.RUnlock()
return c.HAState()
}
// CoreConfig is used to parameterize a core
type CoreConfig struct {
entCoreConfig
@ -3699,3 +3706,26 @@ func (c *Core) GetHCPLinkStatus() (string, string) {
return status, resourceID
}
func (c *Core) HAEnabled() bool {
return c.ha != nil && c.ha.HAEnabled()
}
func (c *Core) GetRaftConfiguration(ctx context.Context) (*raft.RaftConfigurationResponse, error) {
raftBackend := c.getRaftBackend()
if raftBackend == nil {
return nil, nil
}
return raftBackend.GetConfiguration(ctx)
}
func (c *Core) GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState, error) {
raftBackend := c.getRaftBackend()
if raftBackend == nil {
return nil, nil
}
return raftBackend.GetAutopilotServerState(ctx)
}

View File

@ -4,12 +4,14 @@ import (
"context"
"fmt"
"math"
"os"
"sync"
"time"
"github.com/hashicorp/go-hclog"
scada "github.com/hashicorp/hcp-scada-provider"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/sdk/helper/consts"
"github.com/hashicorp/vault/vault"
"github.com/hashicorp/vault/vault/cluster"
"github.com/hashicorp/vault/vault/hcp_link/capabilities"
@ -23,7 +25,7 @@ import (
type hcpLinkMetaHandler struct {
meta.UnimplementedHCPLinkMetaServer
wrappedCore internal.WrappedCoreListNamespacesMounts
wrappedCore internal.WrappedCoreMeta
scadaProvider scada.SCADAProvider
logger hclog.Logger
@ -129,7 +131,7 @@ func (h *hcpLinkMetaHandler) ListNamespaces(ctx context.Context, req *meta.ListN
func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMountsRequest) (*meta.ListMountsResponse, error) {
mountEntries, err := h.wrappedCore.ListMounts()
if err != nil {
return nil, err
return nil, fmt.Errorf("unable to list secret mounts: %w", err)
}
var mounts []*meta.Mount
@ -140,7 +142,7 @@ func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMount
if nsID != namespace.RootNamespaceID {
ns, err := h.wrappedCore.NamespaceByID(ctx, entry.NamespaceID)
if err != nil {
return nil, err
return nil, fmt.Errorf("unable to get namespace associated with secret mount: %w", err)
}
path = ns.Path + path
@ -161,7 +163,7 @@ func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMount
func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsRequest) (*meta.ListAuthResponse, error) {
authEntries, err := h.wrappedCore.ListAuths()
if err != nil {
return nil, err
return nil, fmt.Errorf("unable to list auth mounts: %w", err)
}
var auths []*meta.Auth
@ -172,7 +174,7 @@ func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsR
if nsID != namespace.RootNamespaceID {
ns, err := h.wrappedCore.NamespaceByID(ctx, entry.NamespaceID)
if err != nil {
return nil, err
return nil, fmt.Errorf("unable to get namespace associated with auth mount: %w", err)
}
path = ns.Path + path
@ -189,3 +191,113 @@ func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsR
Auths: auths,
}, nil
}
func (h *hcpLinkMetaHandler) GetClusterStatus(ctx context.Context, req *meta.GetClusterStatusRequest) (*meta.GetClusterStatusResponse, error) {
if h.wrappedCore.HAStateWithLock() != consts.Active {
return nil, fmt.Errorf("node not active")
}
hostname, err := os.Hostname()
if err != nil {
return nil, fmt.Errorf("unable to fetch hostname: %w", err)
}
haEnabled := h.wrappedCore.HAEnabled()
haStatus := &meta.HAStatus{
Enabled: haEnabled,
}
if haEnabled {
leader := &meta.HANode{
Hostname: hostname,
}
peers := h.wrappedCore.GetHAPeerNodesCached()
haNodes := make([]*meta.HANode, len(peers)+1)
haNodes[0] = leader
for i, peerNode := range peers {
haNodes[i+1] = &meta.HANode{
Hostname: peerNode.Hostname,
}
}
haStatus.Nodes = haNodes
}
raftStatus := &meta.RaftStatus{}
raftConfig, err := h.wrappedCore.GetRaftConfiguration(ctx)
if err != nil {
return nil, fmt.Errorf("unable to get Raft configuration: %w", err)
}
if raftConfig != nil {
raftServers := make([]*meta.RaftServer, len(raftConfig.Servers))
var voterCount uint32
for i, srv := range raftConfig.Servers {
raftServers[i] = &meta.RaftServer{
NodeID: srv.NodeID,
Address: srv.Address,
Voter: srv.Voter,
Leader: srv.Leader,
ProtocolVersion: srv.ProtocolVersion,
}
if srv.Voter {
voterCount++
}
}
raftStatus.RaftConfiguration = &meta.RaftConfiguration{
Servers: raftServers,
}
evenVoterMessage := "Vault should have access to an odd number of voter nodes."
largeClusterMessage := "Very large cluster detected."
var quorumWarning string
if voterCount == 1 {
quorumWarning = "Only one server node found. Vault is not running in high availability mode."
} else if voterCount%2 == 0 && voterCount > 7 {
quorumWarning = evenVoterMessage + " " + largeClusterMessage
} else if voterCount%2 == 0 {
quorumWarning = evenVoterMessage
} else if voterCount > 7 {
quorumWarning = largeClusterMessage
}
raftStatus.QuorumWarning = quorumWarning
}
raftAutopilotState, err := h.wrappedCore.GetRaftAutopilotState(ctx)
if err != nil {
return nil, fmt.Errorf("unable to get Raft Autopilot state: %w", err)
}
if raftAutopilotState != nil {
autopilotStatus := &meta.AutopilotStatus{
Healthy: raftAutopilotState.Healthy,
}
autopilotServers := make([]*meta.AutopilotServer, 0)
for _, srv := range raftAutopilotState.Servers {
autopilotServers = append(autopilotServers, &meta.AutopilotServer{
ID: srv.ID,
Healthy: srv.Healthy,
})
}
raftStatus.AutopilotStatus = autopilotStatus
}
resp := &meta.GetClusterStatusResponse{
ClusterID: h.wrappedCore.ClusterID(),
HAStatus: haStatus,
RaftStatus: raftStatus,
StorageType: h.wrappedCore.StorageType(),
}
return resp, nil
}

View File

@ -4,6 +4,7 @@ import (
"context"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/physical/raft"
"github.com/hashicorp/vault/sdk/helper/consts"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/vault/vault"
@ -30,14 +31,21 @@ type WrappedCoreHCPToken interface {
var _ WrappedCoreHCPToken = &vault.Core{}
type WrappedCoreListNamespacesMounts interface {
type WrappedCoreMeta interface {
NamespaceByID(ctx context.Context, nsID string) (*namespace.Namespace, error)
ListNamespaces(includePath bool) []*namespace.Namespace
ListMounts() ([]*vault.MountEntry, error)
ListAuths() ([]*vault.MountEntry, error)
HAEnabled() bool
HAStateWithLock() consts.HAState
GetHAPeerNodesCached() []vault.PeerNode
GetRaftConfiguration(ctx context.Context) (*raft.RaftConfigurationResponse, error)
GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState, error)
StorageType() string
ClusterID() string
}
var _ WrappedCoreListNamespacesMounts = &vault.Core{}
var _ WrappedCoreMeta = &vault.Core{}
type WrappedCoreHCPLinkStatus interface {
WrappedCoreStandbyStates

View File

@ -796,7 +796,7 @@ func (c *Core) handleCancelableRequest(ctx context.Context, req *logical.Request
}
if walState.LocalIndex != 0 || walState.ReplicatedIndex != 0 {
walState.ClusterID = c.clusterID.Load()
walState.ClusterID = c.ClusterID()
if walState.LocalIndex == 0 {
if c.perfStandby {
walState.LocalIndex = LastRemoteWAL(c)
@ -2343,7 +2343,7 @@ func (c *Core) checkSSCTokenInternal(ctx context.Context, token string, isPerfSt
return plainToken.Random, nil
}
requiredWalState := &logical.WALState{ClusterID: c.clusterID.Load(), LocalIndex: plainToken.LocalIndex, ReplicatedIndex: 0}
requiredWalState := &logical.WALState{ClusterID: c.ClusterID(), LocalIndex: plainToken.LocalIndex, ReplicatedIndex: 0}
if c.HasWALState(requiredWalState, isPerfStandby) {
return plainToken.Random, nil
}