2023-03-28 18:39:22 +00:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
|
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
|
2017-03-01 22:04:40 +00:00
|
|
|
package consul
|
|
|
|
|
|
|
|
import (
|
2017-03-20 03:48:42 +00:00
|
|
|
"context"
|
2017-03-01 22:04:40 +00:00
|
|
|
"fmt"
|
|
|
|
|
2017-12-13 01:45:03 +00:00
|
|
|
"github.com/armon/go-metrics"
|
2020-11-13 02:12:12 +00:00
|
|
|
"github.com/armon/go-metrics/prometheus"
|
2017-03-01 22:04:40 +00:00
|
|
|
"github.com/hashicorp/raft"
|
2020-09-25 17:46:38 +00:00
|
|
|
autopilot "github.com/hashicorp/raft-autopilot"
|
2017-03-01 22:04:40 +00:00
|
|
|
"github.com/hashicorp/serf/serf"
|
2021-08-19 21:17:59 +00:00
|
|
|
|
2022-04-19 17:03:03 +00:00
|
|
|
"github.com/hashicorp/consul/agent/consul/autopilotevents"
|
2021-08-19 21:17:59 +00:00
|
|
|
"github.com/hashicorp/consul/agent/metadata"
|
|
|
|
"github.com/hashicorp/consul/agent/structs"
|
2022-04-07 14:48:48 +00:00
|
|
|
"github.com/hashicorp/consul/logging"
|
2021-08-19 21:17:59 +00:00
|
|
|
"github.com/hashicorp/consul/types"
|
2017-03-01 22:04:40 +00:00
|
|
|
)
|
|
|
|
|
2020-11-13 02:12:12 +00:00
|
|
|
var AutopilotGauges = []prometheus.GaugeDefinition{
|
|
|
|
{
|
2020-11-13 21:18:04 +00:00
|
|
|
Name: []string{"autopilot", "failure_tolerance"},
|
2020-11-16 19:02:11 +00:00
|
|
|
Help: "Tracks the number of voting servers that the cluster can lose while continuing to function.",
|
2020-11-13 02:12:12 +00:00
|
|
|
},
|
|
|
|
{
|
2020-11-13 21:18:04 +00:00
|
|
|
Name: []string{"autopilot", "healthy"},
|
2020-11-16 19:02:11 +00:00
|
|
|
Help: "Tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.",
|
2020-11-13 02:12:12 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2017-12-12 00:38:52 +00:00
|
|
|
// AutopilotDelegate is a Consul delegate for autopilot operations.
|
|
|
|
type AutopilotDelegate struct {
|
2022-04-19 17:03:03 +00:00
|
|
|
server *Server
|
|
|
|
readyServersPublisher *autopilotevents.ReadyServersEventPublisher
|
2017-03-08 19:31:32 +00:00
|
|
|
}
|
|
|
|
|
2017-12-13 01:45:03 +00:00
|
|
|
func (d *AutopilotDelegate) AutopilotConfig() *autopilot.Config {
|
2022-04-07 14:48:48 +00:00
|
|
|
return d.server.getAutopilotConfigOrDefault().ToAutopilotLibraryConfig()
|
2017-03-16 01:27:17 +00:00
|
|
|
}
|
2017-03-10 00:43:07 +00:00
|
|
|
|
2020-09-25 17:46:38 +00:00
|
|
|
func (d *AutopilotDelegate) KnownServers() map[raft.ServerID]*autopilot.Server {
|
|
|
|
return d.server.autopilotServers()
|
2017-03-10 00:43:07 +00:00
|
|
|
}
|
|
|
|
|
2020-09-25 17:46:38 +00:00
|
|
|
func (d *AutopilotDelegate) FetchServerStats(ctx context.Context, servers map[raft.ServerID]*autopilot.Server) map[raft.ServerID]*autopilot.ServerStats {
|
|
|
|
return d.server.statsFetcher.Fetch(ctx, servers)
|
2017-03-01 22:04:40 +00:00
|
|
|
}
|
|
|
|
|
2020-09-25 17:46:38 +00:00
|
|
|
func (d *AutopilotDelegate) NotifyState(state *autopilot.State) {
|
2022-04-07 14:48:48 +00:00
|
|
|
metrics.SetGauge([]string{"autopilot", "failure_tolerance"}, float32(state.FailureTolerance))
|
|
|
|
if state.Healthy {
|
|
|
|
metrics.SetGauge([]string{"autopilot", "healthy"}, 1)
|
2021-10-08 17:31:50 +00:00
|
|
|
} else {
|
2022-04-07 14:48:48 +00:00
|
|
|
metrics.SetGauge([]string{"autopilot", "healthy"}, 0)
|
2017-12-13 01:45:03 +00:00
|
|
|
}
|
2022-04-19 17:03:03 +00:00
|
|
|
|
|
|
|
d.readyServersPublisher.PublishReadyServersEvents(state)
|
2022-09-09 14:02:01 +00:00
|
|
|
|
|
|
|
var readyServers uint32
|
|
|
|
for _, server := range state.Servers {
|
|
|
|
if autopilotevents.IsServerReady(server) {
|
|
|
|
readyServers++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
d.server.xdsCapacityController.SetServerCount(readyServers)
|
2017-03-15 23:09:55 +00:00
|
|
|
}
|
|
|
|
|
2020-11-05 16:18:59 +00:00
|
|
|
func (d *AutopilotDelegate) RemoveFailedServer(srv *autopilot.Server) {
|
2021-10-26 20:08:55 +00:00
|
|
|
serverEntMeta := structs.DefaultEnterpriseMetaInDefaultPartition()
|
2020-11-05 16:18:59 +00:00
|
|
|
go func() {
|
2021-10-26 20:08:55 +00:00
|
|
|
if err := d.server.RemoveFailedNode(srv.Name, false, serverEntMeta); err != nil {
|
2021-09-20 12:40:58 +00:00
|
|
|
d.server.logger.Error("failed to remove server", "name", srv.Name, "id", srv.ID, "error", err)
|
2020-11-05 16:18:59 +00:00
|
|
|
}
|
|
|
|
}()
|
2017-03-21 23:36:44 +00:00
|
|
|
}
|
2017-12-13 01:45:03 +00:00
|
|
|
|
2020-09-25 17:46:38 +00:00
|
|
|
func (s *Server) initAutopilot(config *Config) {
|
2022-04-19 17:03:03 +00:00
|
|
|
apDelegate := &AutopilotDelegate{
|
|
|
|
server: s,
|
|
|
|
readyServersPublisher: autopilotevents.NewReadyServersEventPublisher(autopilotevents.Config{
|
|
|
|
Publisher: s.publisher,
|
|
|
|
GetStore: func() autopilotevents.StateStore { return s.fsm.State() },
|
|
|
|
}),
|
|
|
|
}
|
2020-09-25 17:46:38 +00:00
|
|
|
|
|
|
|
s.autopilot = autopilot.New(
|
|
|
|
s.raft,
|
|
|
|
apDelegate,
|
|
|
|
autopilot.WithLogger(s.logger),
|
|
|
|
autopilot.WithReconcileInterval(config.AutopilotInterval),
|
|
|
|
autopilot.WithUpdateInterval(config.ServerHealthInterval),
|
|
|
|
autopilot.WithPromoter(s.autopilotPromoter()),
|
2022-04-07 14:48:48 +00:00
|
|
|
autopilot.WithReconciliationDisabled(),
|
2020-09-25 17:46:38 +00:00
|
|
|
)
|
2022-04-19 17:03:03 +00:00
|
|
|
|
|
|
|
// registers a snapshot handler for the event publisher to send as the first event for a new stream
|
proxycfg: server-local config entry data sources
This is the OSS portion of enterprise PR 2056.
This commit provides server-local implementations of the proxycfg.ConfigEntry
and proxycfg.ConfigEntryList interfaces, that source data from streaming events.
It makes use of the LocalMaterializer type introduced for peering replication,
adding the necessary support for authorization.
It also adds support for "wildcard" subscriptions (within a topic) to the event
publisher, as this is needed to fetch service-resolvers for all services when
configuring mesh gateways.
Currently, events will be emitted for just the ingress-gateway, service-resolver,
and mesh config entry types, as these are the only entries required by proxycfg
— the events will be emitted on topics named IngressGateway, ServiceResolver,
and MeshConfig topics respectively.
Though these events will only be consumed "locally" for now, they can also be
consumed via the gRPC endpoint (confirmed using grpcurl) so using them from
client agents should be a case of swapping the LocalMaterializer for an
RPCMaterializer.
2022-07-01 15:09:47 +00:00
|
|
|
s.publisher.RegisterHandler(autopilotevents.EventTopicReadyServers, apDelegate.readyServersPublisher.HandleSnapshot, false)
|
2017-12-13 01:45:03 +00:00
|
|
|
}
|
|
|
|
|
2020-09-25 17:46:38 +00:00
|
|
|
func (s *Server) autopilotServers() map[raft.ServerID]*autopilot.Server {
|
|
|
|
servers := make(map[raft.ServerID]*autopilot.Server)
|
|
|
|
for _, member := range s.serfLAN.Members() {
|
|
|
|
srv, err := s.autopilotServer(member)
|
|
|
|
if err != nil {
|
|
|
|
s.logger.Warn("Error parsing server info", "name", member.Name, "error", err)
|
|
|
|
continue
|
|
|
|
} else if srv == nil {
|
|
|
|
// this member was a client
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
servers[srv.ID] = srv
|
|
|
|
}
|
|
|
|
|
|
|
|
return servers
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Server) autopilotServer(m serf.Member) (*autopilot.Server, error) {
|
|
|
|
ok, srv := metadata.IsConsulServer(m)
|
|
|
|
if !ok {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return s.autopilotServerFromMetadata(srv)
|
2017-12-13 01:45:03 +00:00
|
|
|
}
|
2019-06-28 17:40:07 +00:00
|
|
|
|
2020-09-25 17:46:38 +00:00
|
|
|
func (s *Server) autopilotServerFromMetadata(srv *metadata.Server) (*autopilot.Server, error) {
|
|
|
|
server := &autopilot.Server{
|
|
|
|
Name: srv.ShortName,
|
|
|
|
ID: raft.ServerID(srv.ID),
|
|
|
|
Address: raft.ServerAddress(srv.Addr.String()),
|
|
|
|
Version: srv.Build.String(),
|
|
|
|
RaftVersion: srv.RaftVersion,
|
|
|
|
Ext: s.autopilotServerExt(srv),
|
|
|
|
}
|
|
|
|
|
|
|
|
switch srv.Status {
|
|
|
|
case serf.StatusLeft:
|
|
|
|
server.NodeStatus = autopilot.NodeLeft
|
|
|
|
case serf.StatusAlive, serf.StatusLeaving:
|
|
|
|
// we want to treat leaving as alive to prevent autopilot from
|
|
|
|
// prematurely removing the node.
|
|
|
|
server.NodeStatus = autopilot.NodeAlive
|
|
|
|
case serf.StatusFailed:
|
|
|
|
server.NodeStatus = autopilot.NodeFailed
|
|
|
|
default:
|
|
|
|
server.NodeStatus = autopilot.NodeUnknown
|
|
|
|
}
|
|
|
|
|
|
|
|
// populate the node meta if there is any. When a node first joins or if
|
|
|
|
// there are ACL issues then this could be empty if the server has not
|
|
|
|
// yet been able to register itself in the catalog
|
peering: initial sync (#12842)
- Add endpoints related to peering: read, list, generate token, initiate peering
- Update node/service/check table indexing to account for peers
- Foundational changes for pushing service updates to a peer
- Plumb peer name through Health.ServiceNodes path
see: ENT-1765, ENT-1280, ENT-1283, ENT-1283, ENT-1756, ENT-1739, ENT-1750, ENT-1679,
ENT-1709, ENT-1704, ENT-1690, ENT-1689, ENT-1702, ENT-1701, ENT-1683, ENT-1663,
ENT-1650, ENT-1678, ENT-1628, ENT-1658, ENT-1640, ENT-1637, ENT-1597, ENT-1634,
ENT-1613, ENT-1616, ENT-1617, ENT-1591, ENT-1588, ENT-1596, ENT-1572, ENT-1555
Co-authored-by: R.B. Boyer <rb@hashicorp.com>
Co-authored-by: freddygv <freddy@hashicorp.com>
Co-authored-by: Chris S. Kim <ckim@hashicorp.com>
Co-authored-by: Evan Culver <eculver@hashicorp.com>
Co-authored-by: Nitya Dhanushkodi <nitya@hashicorp.com>
2022-04-21 22:34:40 +00:00
|
|
|
_, node, err := s.fsm.State().GetNodeID(types.NodeID(srv.ID), structs.NodeEnterpriseMetaInDefaultPartition(), structs.DefaultPeerKeyword)
|
2020-09-25 17:46:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("error retrieving node from state store: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if node != nil {
|
|
|
|
server.Meta = node.Meta
|
|
|
|
}
|
|
|
|
|
|
|
|
return server, nil
|
2019-06-28 17:40:07 +00:00
|
|
|
}
|
2022-04-07 14:48:48 +00:00
|
|
|
|
|
|
|
func (s *Server) getAutopilotConfigOrDefault() *structs.AutopilotConfig {
|
|
|
|
logger := s.loggers.Named(logging.Autopilot)
|
|
|
|
state := s.fsm.State()
|
|
|
|
_, config, err := state.AutopilotConfig()
|
|
|
|
if err != nil {
|
|
|
|
logger.Error("failed to get config", "error", err)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if config != nil {
|
|
|
|
return config
|
|
|
|
}
|
|
|
|
|
|
|
|
// autopilot may start running prior to there ever being a leader
|
|
|
|
// and having an autopilot configuration created. In that case
|
|
|
|
// use the one from the local configuration for now.
|
|
|
|
return s.config.AutopilotConfig
|
|
|
|
}
|