diff --git a/changelog/18351.txt b/changelog/18351.txt new file mode 100644 index 000000000..07faa06d1 --- /dev/null +++ b/changelog/18351.txt @@ -0,0 +1,3 @@ +```release-note:improvement +hcp/status: Add cluster-level status information +``` diff --git a/go.mod b/go.mod index ba7446dae..e7df61cc8 100644 --- a/go.mod +++ b/go.mod @@ -138,7 +138,7 @@ require ( github.com/hashicorp/vault/api/auth/approle v0.1.0 github.com/hashicorp/vault/api/auth/userpass v0.1.0 github.com/hashicorp/vault/sdk v0.6.1 - github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d + github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342 github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab github.com/jackc/pgx/v4 v4.15.0 github.com/jcmturner/gokrb5/v8 v8.4.2 diff --git a/go.sum b/go.sum index 353496b0d..c112eea1f 100644 --- a/go.sum +++ b/go.sum @@ -1173,6 +1173,16 @@ github.com/hashicorp/vault-testing-stepwise v0.1.2 h1:3obC/ziAPGnsz2IQxr5e4Ayb7t github.com/hashicorp/vault-testing-stepwise v0.1.2/go.mod h1:TeU6B+5NqxUjto+Zey+QQEH1iywuHn0ciHZNYh4q3uI= github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d h1:U692VbDl6ww5GQsNFClJVFJDaPeuqtDt1Mwqf21KYek= github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221213220056-b0613b59f419 h1:yl6f//YTaTTGKJwyOpRe7v1DDPrzP+NErwgnef6qx7A= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221213220056-b0613b59f419/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230103211812-c28545e74f94 h1:Rx4Q2/mOPqJuanzwZYttDkWjdibPv3UpvsvKmOkl6h4= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230103211812-c28545e74f94/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230105183308-048241517ffb h1:PgXcBszV61BvxD0wZzm4QCz9btgTWX74NO4be6S2afU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230105183308-048241517ffb/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106184443-96cfe11e7051 h1:cMQoRbIUMhbM0NsmP6hH3S3ZmAPVgic3g3L8Z55rXCI= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106184443-96cfe11e7051/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342 h1:9cMwZnaAV/lKs8EZsvBF00wPt350wD3sg/xqWGeN4gM= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443 h1:O/pT5C1Q3mVXMyuqg7yuAWUg/jMZR1/0QTzTRdNR6Uw= github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443/go.mod h1:bEpDU35nTu0ey1EXjwNwPjI9xErAsoOCmcMb9GKvyxo= github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM= diff --git a/vault/cluster.go b/vault/cluster.go index bc1d352e5..28060721b 100644 --- a/vault/cluster.go +++ b/vault/cluster.go @@ -388,3 +388,7 @@ func (c *Core) SetClusterListenerAddrs(addrs []*net.TCPAddr) { func (c *Core) SetClusterHandler(handler http.Handler) { c.clusterHandler = handler } + +func (c *Core) ClusterID() string { + return c.clusterID.Load() +} diff --git a/vault/core.go b/vault/core.go index a37e23d6f..bcfec5baa 100644 --- a/vault/core.go +++ b/vault/core.go @@ -688,6 +688,13 @@ func (c *Core) HAState() consts.HAState { } } +func (c *Core) HAStateWithLock() consts.HAState { + c.stateLock.RLock() + c.stateLock.RUnlock() + + return c.HAState() +} + // CoreConfig is used to parameterize a core type CoreConfig struct { entCoreConfig @@ -3699,3 +3706,26 @@ func (c *Core) GetHCPLinkStatus() (string, string) { return status, resourceID } + +func (c *Core) HAEnabled() bool { + return c.ha != nil && c.ha.HAEnabled() +} + +func (c *Core) GetRaftConfiguration(ctx context.Context) (*raft.RaftConfigurationResponse, error) { + raftBackend := c.getRaftBackend() + + if raftBackend == nil { + return nil, nil + } + + return raftBackend.GetConfiguration(ctx) +} + +func (c *Core) GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState, error) { + raftBackend := c.getRaftBackend() + if raftBackend == nil { + return nil, nil + } + + return raftBackend.GetAutopilotServerState(ctx) +} diff --git a/vault/hcp_link/capabilities/meta/meta.go b/vault/hcp_link/capabilities/meta/meta.go index a0b5f2db2..c96f7cb60 100644 --- a/vault/hcp_link/capabilities/meta/meta.go +++ b/vault/hcp_link/capabilities/meta/meta.go @@ -4,12 +4,14 @@ import ( "context" "fmt" "math" + "os" "sync" "time" "github.com/hashicorp/go-hclog" scada "github.com/hashicorp/hcp-scada-provider" "github.com/hashicorp/vault/helper/namespace" + "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/vault" "github.com/hashicorp/vault/vault/cluster" "github.com/hashicorp/vault/vault/hcp_link/capabilities" @@ -23,7 +25,7 @@ import ( type hcpLinkMetaHandler struct { meta.UnimplementedHCPLinkMetaServer - wrappedCore internal.WrappedCoreListNamespacesMounts + wrappedCore internal.WrappedCoreMeta scadaProvider scada.SCADAProvider logger hclog.Logger @@ -129,7 +131,7 @@ func (h *hcpLinkMetaHandler) ListNamespaces(ctx context.Context, req *meta.ListN func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMountsRequest) (*meta.ListMountsResponse, error) { mountEntries, err := h.wrappedCore.ListMounts() if err != nil { - return nil, err + return nil, fmt.Errorf("unable to list secret mounts: %w", err) } var mounts []*meta.Mount @@ -140,7 +142,7 @@ func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMount if nsID != namespace.RootNamespaceID { ns, err := h.wrappedCore.NamespaceByID(ctx, entry.NamespaceID) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to get namespace associated with secret mount: %w", err) } path = ns.Path + path @@ -161,7 +163,7 @@ func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMount func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsRequest) (*meta.ListAuthResponse, error) { authEntries, err := h.wrappedCore.ListAuths() if err != nil { - return nil, err + return nil, fmt.Errorf("unable to list auth mounts: %w", err) } var auths []*meta.Auth @@ -172,7 +174,7 @@ func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsR if nsID != namespace.RootNamespaceID { ns, err := h.wrappedCore.NamespaceByID(ctx, entry.NamespaceID) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to get namespace associated with auth mount: %w", err) } path = ns.Path + path @@ -189,3 +191,113 @@ func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsR Auths: auths, }, nil } + +func (h *hcpLinkMetaHandler) GetClusterStatus(ctx context.Context, req *meta.GetClusterStatusRequest) (*meta.GetClusterStatusResponse, error) { + if h.wrappedCore.HAStateWithLock() != consts.Active { + return nil, fmt.Errorf("node not active") + } + + hostname, err := os.Hostname() + if err != nil { + return nil, fmt.Errorf("unable to fetch hostname: %w", err) + } + + haEnabled := h.wrappedCore.HAEnabled() + haStatus := &meta.HAStatus{ + Enabled: haEnabled, + } + + if haEnabled { + leader := &meta.HANode{ + Hostname: hostname, + } + + peers := h.wrappedCore.GetHAPeerNodesCached() + + haNodes := make([]*meta.HANode, len(peers)+1) + haNodes[0] = leader + + for i, peerNode := range peers { + haNodes[i+1] = &meta.HANode{ + Hostname: peerNode.Hostname, + } + } + + haStatus.Nodes = haNodes + } + + raftStatus := &meta.RaftStatus{} + raftConfig, err := h.wrappedCore.GetRaftConfiguration(ctx) + if err != nil { + return nil, fmt.Errorf("unable to get Raft configuration: %w", err) + } + + if raftConfig != nil { + raftServers := make([]*meta.RaftServer, len(raftConfig.Servers)) + + var voterCount uint32 + for i, srv := range raftConfig.Servers { + raftServers[i] = &meta.RaftServer{ + NodeID: srv.NodeID, + Address: srv.Address, + Voter: srv.Voter, + Leader: srv.Leader, + ProtocolVersion: srv.ProtocolVersion, + } + + if srv.Voter { + voterCount++ + } + } + + raftStatus.RaftConfiguration = &meta.RaftConfiguration{ + Servers: raftServers, + } + + evenVoterMessage := "Vault should have access to an odd number of voter nodes." + largeClusterMessage := "Very large cluster detected." + var quorumWarning string + + if voterCount == 1 { + quorumWarning = "Only one server node found. Vault is not running in high availability mode." + } else if voterCount%2 == 0 && voterCount > 7 { + quorumWarning = evenVoterMessage + " " + largeClusterMessage + } else if voterCount%2 == 0 { + quorumWarning = evenVoterMessage + } else if voterCount > 7 { + quorumWarning = largeClusterMessage + } + + raftStatus.QuorumWarning = quorumWarning + } + + raftAutopilotState, err := h.wrappedCore.GetRaftAutopilotState(ctx) + if err != nil { + return nil, fmt.Errorf("unable to get Raft Autopilot state: %w", err) + } + + if raftAutopilotState != nil { + autopilotStatus := &meta.AutopilotStatus{ + Healthy: raftAutopilotState.Healthy, + } + + autopilotServers := make([]*meta.AutopilotServer, 0) + for _, srv := range raftAutopilotState.Servers { + autopilotServers = append(autopilotServers, &meta.AutopilotServer{ + ID: srv.ID, + Healthy: srv.Healthy, + }) + } + + raftStatus.AutopilotStatus = autopilotStatus + } + + resp := &meta.GetClusterStatusResponse{ + ClusterID: h.wrappedCore.ClusterID(), + HAStatus: haStatus, + RaftStatus: raftStatus, + StorageType: h.wrappedCore.StorageType(), + } + + return resp, nil +} diff --git a/vault/hcp_link/internal/wrapped_hcpLink.go b/vault/hcp_link/internal/wrapped_hcpLink.go index 0bc9717bc..98b0bf4e0 100644 --- a/vault/hcp_link/internal/wrapped_hcpLink.go +++ b/vault/hcp_link/internal/wrapped_hcpLink.go @@ -4,6 +4,7 @@ import ( "context" "github.com/hashicorp/vault/helper/namespace" + "github.com/hashicorp/vault/physical/raft" "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/sdk/logical" "github.com/hashicorp/vault/vault" @@ -30,14 +31,21 @@ type WrappedCoreHCPToken interface { var _ WrappedCoreHCPToken = &vault.Core{} -type WrappedCoreListNamespacesMounts interface { +type WrappedCoreMeta interface { NamespaceByID(ctx context.Context, nsID string) (*namespace.Namespace, error) ListNamespaces(includePath bool) []*namespace.Namespace ListMounts() ([]*vault.MountEntry, error) ListAuths() ([]*vault.MountEntry, error) + HAEnabled() bool + HAStateWithLock() consts.HAState + GetHAPeerNodesCached() []vault.PeerNode + GetRaftConfiguration(ctx context.Context) (*raft.RaftConfigurationResponse, error) + GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState, error) + StorageType() string + ClusterID() string } -var _ WrappedCoreListNamespacesMounts = &vault.Core{} +var _ WrappedCoreMeta = &vault.Core{} type WrappedCoreHCPLinkStatus interface { WrappedCoreStandbyStates diff --git a/vault/request_handling.go b/vault/request_handling.go index 6703d3a53..3183a0273 100644 --- a/vault/request_handling.go +++ b/vault/request_handling.go @@ -796,7 +796,7 @@ func (c *Core) handleCancelableRequest(ctx context.Context, req *logical.Request } if walState.LocalIndex != 0 || walState.ReplicatedIndex != 0 { - walState.ClusterID = c.clusterID.Load() + walState.ClusterID = c.ClusterID() if walState.LocalIndex == 0 { if c.perfStandby { walState.LocalIndex = LastRemoteWAL(c) @@ -2343,7 +2343,7 @@ func (c *Core) checkSSCTokenInternal(ctx context.Context, token string, isPerfSt return plainToken.Random, nil } - requiredWalState := &logical.WALState{ClusterID: c.clusterID.Load(), LocalIndex: plainToken.LocalIndex, ReplicatedIndex: 0} + requiredWalState := &logical.WALState{ClusterID: c.ClusterID(), LocalIndex: plainToken.LocalIndex, ReplicatedIndex: 0} if c.HasWALState(requiredWalState, isPerfStandby) { return plainToken.Random, nil }