open-consul/agent/agent.go

package agent

import (
	"context"
	"crypto/tls"
	"encoding/json"
	"fmt"
	"io"
	"io/ioutil"
	"net"
	"net/http"
	"os"
	"path/filepath"
	"regexp"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/armon/go-metrics"
	"github.com/armon/go-metrics/prometheus"
	"github.com/hashicorp/go-connlimit"
	"github.com/hashicorp/go-hclog"
	"github.com/hashicorp/go-memdb"
	"github.com/hashicorp/go-multierror"
	"github.com/hashicorp/raft"
	"github.com/hashicorp/serf/serf"
	"golang.org/x/net/http2"
	"google.golang.org/grpc"

	"github.com/hashicorp/consul/acl"
	"github.com/hashicorp/consul/agent/ae"
	"github.com/hashicorp/consul/agent/cache"
	cachetype "github.com/hashicorp/consul/agent/cache-types"
	"github.com/hashicorp/consul/agent/checks"
	"github.com/hashicorp/consul/agent/config"
	"github.com/hashicorp/consul/agent/consul"
	"github.com/hashicorp/consul/agent/dns"
	"github.com/hashicorp/consul/agent/local"
	"github.com/hashicorp/consul/agent/proxycfg"
	"github.com/hashicorp/consul/agent/rpcclient/health"
	"github.com/hashicorp/consul/agent/structs"
	"github.com/hashicorp/consul/agent/systemd"
	"github.com/hashicorp/consul/agent/token"
	"github.com/hashicorp/consul/agent/xds"
	"github.com/hashicorp/consul/api"
	"github.com/hashicorp/consul/api/watch"
	"github.com/hashicorp/consul/ipaddr"
	"github.com/hashicorp/consul/lib"
	"github.com/hashicorp/consul/lib/file"
	"github.com/hashicorp/consul/lib/mutex"
	"github.com/hashicorp/consul/lib/routine"
	"github.com/hashicorp/consul/logging"
	"github.com/hashicorp/consul/tlsutil"
	"github.com/hashicorp/consul/types"
)

const (
	// Path to save agent service definitions
	servicesDir      = "services"
	serviceConfigDir = "services/configs"

	// Path to save agent proxy definitions
	proxyDir = "proxies"

	// Path to save local agent checks
	checksDir     = "checks"
	checkStateDir = "checks/state"

	// Default reasons for node/service maintenance mode
	defaultNodeMaintReason = "Maintenance mode is enabled for this node, " +
		"but no reason was provided. This is a default message."
	defaultServiceMaintReason = "Maintenance mode is enabled for this " +
		"service, but no reason was provided. This is a default message."

	// ID of the roots watch
	rootsWatchID = "roots"

	// ID of the leaf watch
	leafWatchID = "leaf"

	// maxQueryTime is used to bound the limit of a blocking query
	maxQueryTime = 600 * time.Second

	// defaultQueryTime is the amount of time we block waiting for a change
	// if no time is specified. Previously we would wait the maxQueryTime.
	defaultQueryTime = 300 * time.Second
)

var (
	httpAddrRE = regexp.MustCompile(`^(http[s]?://)(\[.*?\]|\[?[\w\-\.]+)(:\d+)?([^?]*)(\?.*)?$`)
	grpcAddrRE = regexp.MustCompile("(.*)((?::)(?:[0-9]+))(.*)$")
)

type configSource int

const (
	ConfigSourceLocal configSource = iota
	ConfigSourceRemote
)

var configSourceToName = map[configSource]string{
	ConfigSourceLocal:  "local",
	ConfigSourceRemote: "remote",
}
var configSourceFromName = map[string]configSource{
	"local":  ConfigSourceLocal,
	"remote": ConfigSourceRemote,
	// If the value is not found in the persisted config file, then use the
	// former default.
	"": ConfigSourceLocal,
}

func (s configSource) String() string {
	return configSourceToName[s]
}

// ConfigSourceFromName will unmarshal the string form of a configSource.
func ConfigSourceFromName(name string) (configSource, bool) {
	s, ok := configSourceFromName[name]
	return s, ok
}

// delegate defines the interface shared by both
// consul.Client and consul.Server.
type delegate interface {
	// Leave is used to prepare for a graceful shutdown.
	Leave() error

	// AgentLocalMember is used to retrieve the LAN member for the local node.
	AgentLocalMember() serf.Member

	// LANMembersInAgentPartition returns the LAN members for this agent's
	// canonical serf pool. For clients this is the only pool that exists. For
	// servers it's the pool in the default segment and the default partition.
	LANMembersInAgentPartition() []serf.Member

	// LANMembers returns the LAN members for one of:
	//
	// - the requested partition
	// - the requested segment
	// - all segments
	//
	// This is limited to segments and partitions that the node is a member of.
	LANMembers(f consul.LANMemberFilter) ([]serf.Member, error)

	// GetLANCoordinate returns the coordinate of the node in the LAN gossip
	// pool.
	//
	// - Clients return a single coordinate for the single gossip pool they are
	//   in (default, segment, or partition).
	//
	// - Servers return one coordinate for their canonical gossip pool (i.e.
	//   default partition/segment) and one per segment they are also ancillary
	//   members of.
	//
	// NOTE: servers do not emit coordinates for partitioned gossip pools they
	// are ancillary members of.
	//
	// NOTE: This assumes coordinates are enabled, so check that before calling.
	GetLANCoordinate() (lib.CoordinateSet, error)

	// JoinLAN is used to have Consul join the inner-DC pool The target address
	// should be another node inside the DC listening on the Serf LAN address
	JoinLAN(addrs []string, entMeta *structs.EnterpriseMeta) (n int, err error)

	// RemoveFailedNode is used to remove a failed node from the cluster.
	RemoveFailedNode(node string, prune bool, entMeta *structs.EnterpriseMeta) error

	// ResolveTokenAndDefaultMeta returns an acl.Authorizer which authorizes
	// actions based on the permissions granted to the token.
	// If either entMeta or authzContext are non-nil they will be populated with the
	// default partition and namespace from the token.
	ResolveTokenAndDefaultMeta(token string, entMeta *structs.EnterpriseMeta, authzContext *acl.AuthorizerContext) (consul.ACLResolveResult, error)

	RPC(method string, args interface{}, reply interface{}) error
	SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer, replyFn structs.SnapshotReplyFn) error
	Shutdown() error
	Stats() map[string]map[string]string
	ReloadConfig(config consul.ReloadableConfig) error
	enterpriseDelegate
}

// notifier is called after a successful JoinLAN.
type notifier interface {
	Notify(string) error
}

// Agent is the long running process that is run on every machine.
// It exposes an RPC interface that is used by the CLI to control the
// agent. The agent runs the query interfaces like HTTP, DNS, and RPC.
// However, it can run in either a client, or server mode. In server
// mode, it runs a full Consul server. In client-only mode, it only forwards
// requests to other Consul servers.
type Agent struct {
	// TODO: remove fields that are already in BaseDeps
	baseDeps BaseDeps

	// config is the agent configuration.
	config *config.RuntimeConfig

	// Used for writing our logs
	logger hclog.InterceptLogger

	// delegate is either a *consul.Server or *consul.Client
	// depending on the configuration
	delegate delegate

	// state stores a local representation of the node,
	// services and checks. Used for anti-entropy.
	State *local.State

	// sync manages the synchronization of the local
	// and the remote state.
	sync *ae.StateSyncer

	// syncMu and syncCh are used to coordinate agent endpoints that are blocking
	// on local state during a config reload.
	syncMu sync.Mutex
	syncCh chan struct{}

	// cache is the in-memory cache for data the Agent requests.
	cache *cache.Cache

	// checkReapAfter maps the check ID to a timeout after which we should
	// reap its associated service
	checkReapAfter map[structs.CheckID]time.Duration

	// checkMonitors maps the check ID to an associated monitor
	checkMonitors map[structs.CheckID]*checks.CheckMonitor

	// checkHTTPs maps the check ID to an associated HTTP check
	checkHTTPs map[structs.CheckID]*checks.CheckHTTP

	// checkH2PINGs maps the check ID to an associated HTTP2 PING check
	checkH2PINGs map[structs.CheckID]*checks.CheckH2PING

	// checkTCPs maps the check ID to an associated TCP check
	checkTCPs map[structs.CheckID]*checks.CheckTCP

	// checkGRPCs maps the check ID to an associated GRPC check
	checkGRPCs map[structs.CheckID]*checks.CheckGRPC

	// checkTTLs maps the check ID to an associated check TTL
	checkTTLs map[structs.CheckID]*checks.CheckTTL

	// checkDockers maps the check ID to an associated Docker Exec based check
	checkDockers map[structs.CheckID]*checks.CheckDocker

	// checkAliases maps the check ID to an associated Alias checks
	checkAliases map[structs.CheckID]*checks.CheckAlias

	// exposedPorts tracks listener ports for checks exposed through a proxy
	exposedPorts map[string]int

	// stateLock protects the agent state
	stateLock *mutex.Mutex

	// dockerClient is the client for performing docker health checks.
	dockerClient *checks.DockerClient

	// eventCh is used to receive user events
	eventCh chan serf.UserEvent

	// eventBuf stores the most recent events in a ring buffer
	// using eventIndex as the next index to insert into. This
	// is guarded by eventLock. When an insert happens, the
	// eventNotify group is notified.
	eventBuf    []*UserEvent
	eventIndex  int
	eventLock   sync.RWMutex
	eventNotify NotifyGroup

	shutdown     bool
	shutdownCh   chan struct{}
	shutdownLock sync.Mutex

	// joinLANNotifier is called after a successful JoinLAN.
	joinLANNotifier notifier

	// retryJoinCh transports errors from the retry join
	// attempts.
	retryJoinCh chan error

	// endpoints maps unique RPC endpoint names to common ones
	// to allow overriding of RPC handlers since the golang
	// net/rpc server does not allow this.
	endpoints     map[string]string
	endpointsLock sync.RWMutex

	// dnsServer provides the DNS API
	dnsServers []*DNSServer

	// apiServers listening for connections. If any of these server goroutines
	// fail, the agent will be shutdown.
	apiServers *apiServers

	// httpHandlers provides direct access to (one of) the HTTPHandlers started by
	// this agent. This is used in tests to test HTTP endpoints without overhead
	// of TCP connections etc.
	//
	// TODO: this is a temporary re-introduction after we removed a list of
	// HTTPServers in favour of apiServers abstraction. Now that HTTPHandlers is
	// stateful and has config reloading though it's not OK to just use a
	// different instance of handlers in tests to the ones that the agent is wired
	// up to since then config reloads won't actually affect the handlers under
	// test while plumbing the external handlers in the TestAgent through bypasses
	// testing that the agent itself is actually reloading the state correctly.
	// Once we move `apiServers` to be a passed-in dependency for NewAgent, we
	// should be able to remove this and have the Test Agent create the
	// HTTPHandlers and pass them in removing the need to pull them back out
	// again.
	httpHandlers *HTTPHandlers

	// wgServers is the wait group for all HTTP and DNS servers
	// TODO: remove once dnsServers are handled by apiServers
	wgServers sync.WaitGroup

	// watchPlans tracks all the currently-running watch plans for the
	// agent.
	watchPlans []*watch.Plan

	// tokens holds ACL tokens initially from the configuration, but can
	// be updated at runtime, so should always be used instead of going to
	// the configuration directly.
	tokens *token.Store

	// proxyConfig is the manager for proxy service (Kind = connect-proxy)
	// configuration state. This ensures all state needed by a proxy registration
	// is maintained in cache and handles pushing updates to that state into XDS
	// server to be pushed out to Envoy.
	proxyConfig *proxycfg.Manager

	// serviceManager is the manager for combining local service registrations with
	// the centrally configured proxy/service defaults.
	serviceManager *ServiceManager

	// grpcServer is the server instance used currently to serve xDS API for
	// Envoy.
	grpcServer *grpc.Server

	// tlsConfigurator is the central instance to provide a *tls.Config
	// based on the current consul configuration.
	tlsConfigurator *tlsutil.Configurator

	// httpConnLimiter is used to limit connections to the HTTP server by client
	// IP.
	httpConnLimiter connlimit.Limiter

	// configReloaders are subcomponents that need to be notified on a reload so
	// they can update their internal state.
	configReloaders []ConfigReloader

	// TODO: pass directly to HTTPHandlers and DNSServer once those are passed
	// into Agent, which will allow us to remove this field.
	rpcClientHealth *health.Client

	// routineManager is responsible for managing longer running go routines
	// run by the Agent
	routineManager *routine.Manager

	// enterpriseAgent embeds fields that we only access in consul-enterprise builds
	enterpriseAgent
}

// New process the desired options and creates a new Agent.
// This process will
//   * parse the config given the config Flags
//   * setup logging
//      * using predefined logger given in an option
//        OR
//      * initialize a new logger from the configuration
//        including setting up gRPC logging
//   * initialize telemetry
//   * create a TLS Configurator
//   * build a shared connection pool
//   * create the ServiceManager
//   * setup the NodeID if one isn't provided in the configuration
//   * create the AutoConfig object for future use in fully
//     resolving the configuration
func New(bd BaseDeps) (*Agent, error) {
	a := Agent{
		checkReapAfter:  make(map[structs.CheckID]time.Duration),
		checkMonitors:   make(map[structs.CheckID]*checks.CheckMonitor),
		checkTTLs:       make(map[structs.CheckID]*checks.CheckTTL),
		checkHTTPs:      make(map[structs.CheckID]*checks.CheckHTTP),
		checkH2PINGs:    make(map[structs.CheckID]*checks.CheckH2PING),
		checkTCPs:       make(map[structs.CheckID]*checks.CheckTCP),
		checkGRPCs:      make(map[structs.CheckID]*checks.CheckGRPC),
		checkDockers:    make(map[structs.CheckID]*checks.CheckDocker),
		checkAliases:    make(map[structs.CheckID]*checks.CheckAlias),
		eventCh:         make(chan serf.UserEvent, 1024),
		eventBuf:        make([]*UserEvent, 256),
		joinLANNotifier: &systemd.Notifier{},
		retryJoinCh:     make(chan error),
		shutdownCh:      make(chan struct{}),
		endpoints:       make(map[string]string),
		stateLock:       mutex.New(),

		baseDeps:        bd,
		tokens:          bd.Tokens,
		logger:          bd.Logger,
		tlsConfigurator: bd.TLSConfigurator,
		config:          bd.RuntimeConfig,
		cache:           bd.Cache,
		routineManager:  routine.NewManager(bd.Logger),
	}

	// TODO: create rpcClientHealth in BaseDeps once NetRPC is available without Agent
	conn, err := bd.GRPCConnPool.ClientConn(bd.RuntimeConfig.Datacenter)
	if err != nil {
		return nil, err
	}

	a.rpcClientHealth = &health.Client{
		Cache:     bd.Cache,
		NetRPC:    &a,
		CacheName: cachetype.HealthServicesName,
		ViewStore: bd.ViewStore,
		MaterializerDeps: health.MaterializerDeps{
			Conn:   conn,
			Logger: bd.Logger.Named("rpcclient.health"),
		},
		UseStreamingBackend: a.config.UseStreamingBackend,
		QueryOptionDefaults: config.ApplyDefaultQueryOptions(a.config),
	}

	a.serviceManager = NewServiceManager(&a)

	// We used to do this in the Start method. However it doesn't need to go
	// there any longer. Originally it did because we passed the agent
	// delegate to some of the cache registrations. Now we just
	// pass the agent itself so its safe to move here.
	a.registerCache()

	// TODO: why do we ignore failure to load persisted tokens?
	_ = a.tokens.Load(bd.RuntimeConfig.ACLTokens, a.logger)

	// TODO: pass in a fully populated apiServers into Agent.New
	a.apiServers = NewAPIServers(a.logger)

	return &a, nil
}

// GetConfig retrieves the agents config
// TODO make export the config field and get rid of this method
// This is here for now to simplify the work I am doing and make
// reviewing the final PR easier.
func (a *Agent) GetConfig() *config.RuntimeConfig {
	a.stateLock.Lock()
	defer a.stateLock.Unlock()
	return a.config
}

// LocalConfig takes a config.RuntimeConfig and maps the fields to a local.Config
func LocalConfig(cfg *config.RuntimeConfig) local.Config {
	lc := local.Config{
		AdvertiseAddr:       cfg.AdvertiseAddrLAN.String(),
		CheckUpdateInterval: cfg.CheckUpdateInterval,
		Datacenter:          cfg.Datacenter,
		DiscardCheckOutput:  cfg.DiscardCheckOutput,
		NodeID:              cfg.NodeID,
		NodeName:            cfg.NodeName,
		Partition:           cfg.PartitionOrDefault(),
		TaggedAddresses:     map[string]string{},
	}
	for k, v := range cfg.TaggedAddresses {
		lc.TaggedAddresses[k] = v
	}
	return lc
}

// Start verifies its configuration and runs an agent's various subprocesses.
func (a *Agent) Start(ctx context.Context) error {
	a.stateLock.Lock()
	defer a.stateLock.Unlock()

	// This needs to be done early on as it will potentially alter the configuration
	// and then how other bits are brought up
	c, err := a.baseDeps.AutoConfig.InitialConfiguration(ctx)
	if err != nil {
		return err
	}

	// copy over the existing node id, this cannot be
	// changed while running anyways but this prevents
	// breaking some existing behavior. then overwrite
	// the configuration
	c.NodeID = a.config.NodeID
	a.config = c

	if err := a.tlsConfigurator.Update(a.config.ToTLSUtilConfig()); err != nil {
		return fmt.Errorf("Failed to load TLS configurations after applying auto-config settings: %w", err)
	}

	if err := a.startLicenseManager(ctx); err != nil {
		return err
	}

	// create the local state
	a.State = local.NewState(LocalConfig(c), a.logger, a.tokens)

	// create the state synchronization manager which performs
	// regular and on-demand state synchronizations (anti-entropy).
	a.sync = ae.NewStateSyncer(a.State, c.AEInterval, a.shutdownCh, a.logger)

	// create the config for the rpc server/client
	consulCfg, err := newConsulConfig(a.config, a.logger)
	if err != nil {
		return err
	}

	// Setup the user event callback
	consulCfg.UserEventHandler = func(e serf.UserEvent) {
		select {
		case a.eventCh <- e:
		case <-a.shutdownCh:
		}
	}

	// ServerUp is used to inform that a new consul server is now
	// up. This can be used to speed up the sync process if we are blocking
	// waiting to discover a consul server
	consulCfg.ServerUp = a.sync.SyncFull.Trigger

	err = a.initEnterprise(consulCfg)
	if err != nil {
		return fmt.Errorf("failed to start Consul enterprise component: %v", err)
	}

	// Setup either the client or the server.
	if c.ServerMode {
		server, err := consul.NewServer(consulCfg, a.baseDeps.Deps)
		if err != nil {
			return fmt.Errorf("Failed to start Consul server: %v", err)
		}
		a.delegate = server
	} else {
		client, err := consul.NewClient(consulCfg, a.baseDeps.Deps)
		if err != nil {
			return fmt.Errorf("Failed to start Consul client: %v", err)
		}
		a.delegate = client
	}

	// The staggering of the state syncing depends on the cluster size.
	//
	// NOTE: we will use the agent's canonical serf pool for this since that's
	// similarly scoped with the state store side of anti-entropy.
	a.sync.ClusterSize = func() int { return len(a.delegate.LANMembersInAgentPartition()) }

	// link the state with the consul server/client and the state syncer
	// via callbacks. After several attempts this was easier than using
	// channels since the event notification needs to be non-blocking
	// and that should be hidden in the state syncer implementation.
	a.State.Delegate = a.delegate
	a.State.TriggerSyncChanges = a.sync.SyncChanges.Trigger

	if err := a.baseDeps.AutoConfig.Start(&lib.StopChannelContext{StopCh: a.shutdownCh}); err != nil {
		return fmt.Errorf("AutoConf failed to start certificate monitor: %w", err)
	}

	// Load checks/services/metadata.
	emptyCheckSnapshot := map[structs.CheckID]*structs.HealthCheck{}
	if err := a.loadServices(c, emptyCheckSnapshot); err != nil {
		return err
	}
	if err := a.loadChecks(c, nil); err != nil {
		return err
	}
	if err := a.loadMetadata(c); err != nil {
		return err
	}

	var intentionDefaultAllow bool
	switch a.config.ACLResolverSettings.ACLDefaultPolicy {
	case "allow":
		intentionDefaultAllow = true
	case "deny":
		intentionDefaultAllow = false
	default:
		return fmt.Errorf("unexpected ACL default policy value of %q", a.config.ACLResolverSettings.ACLDefaultPolicy)
	}

	go a.baseDeps.ViewStore.Run(&lib.StopChannelContext{StopCh: a.shutdownCh})

	// Start the proxy config manager.
	a.proxyConfig, err = proxycfg.NewManager(proxycfg.ManagerConfig{
		Cache:  a.cache,
		Health: a.rpcClientHealth,
		Logger: a.logger.Named(logging.ProxyConfig),
		State:  a.State,
		Tokens: a.baseDeps.Tokens,
		Source: &structs.QuerySource{
			Datacenter:    a.config.Datacenter,
			Segment:       a.config.SegmentName,
			NodePartition: a.config.PartitionOrEmpty(),
		},
		DNSConfig: proxycfg.DNSConfig{
			Domain:    a.config.DNSDomain,
			AltDomain: a.config.DNSAltDomain,
		},
		TLSConfigurator:       a.tlsConfigurator,
		IntentionDefaultAllow: intentionDefaultAllow,
	})
	if err != nil {
		return err
	}
	go func() {
		if err := a.proxyConfig.Run(); err != nil {
			a.logger.Error("proxy config manager exited with error", "error", err)
		}
	}()

	// Start watching for critical services to deregister, based on their
	// checks.
	go a.reapServices()

	// Start handling events.
	go a.handleEvents()

	// Start sending network coordinate to the server.
	if !c.DisableCoordinates {
		go a.sendCoordinate()
	}

	// Write out the PID file if necessary.
	if err := a.storePid(); err != nil {
		return err
	}

	// start DNS servers
	if err := a.listenAndServeDNS(); err != nil {
		return err
	}

	// Configure the http connection limiter.
	a.httpConnLimiter.SetConfig(connlimit.Config{
		MaxConnsPerClientIP: a.config.HTTPMaxConnsPerClient,
	})

	// Create listeners and unstarted servers; see comment on listenHTTP why
	// we are doing this.
	servers, err := a.listenHTTP()
	if err != nil {
		return err
	}

	// Start HTTP and HTTPS servers.
	for _, srv := range servers {
		a.apiServers.Start(srv)
	}

	// Start gRPC server.
	if err := a.listenAndServeGRPC(); err != nil {
		return err
	}

	// register watches
	if err := a.reloadWatches(a.config); err != nil {
		return err
	}

	// start retry join
	go a.retryJoinLAN()
	if a.config.ServerMode {
		go a.retryJoinWAN()
	}

	// DEPRECATED: Warn users if they're emitting deprecated metrics. Remove this warning and the flagged metrics in a
	// future release of Consul.
	if !a.config.Telemetry.DisableCompatOneNine {
		a.logger.Warn("DEPRECATED Backwards compatibility with pre-1.9 metrics enabled. These metrics will be removed in a future version of Consul. Set `telemetry { disable_compat_1.9 = true }` to disable them.")
	}

	if a.tlsConfigurator.Cert() != nil {
		m := tlsCertExpirationMonitor(a.tlsConfigurator, a.logger)
		go m.Monitor(&lib.StopChannelContext{StopCh: a.shutdownCh})
	}

	// consul version metric with labels
	metrics.SetGaugeWithLabels([]string{"version"}, 1, []metrics.Label{
		{Name: "version", Value: a.config.Version},
		{Name: "pre_release", Value: a.config.VersionPrerelease},
	})

	return nil
}

var Gauges = []prometheus.GaugeDefinition{
	{
		Name: []string{"version"},
		Help: "Represents the Consul version.",
	},
}

// Failed returns a channel which is closed when the first server goroutine exits
// with a non-nil error.
func (a *Agent) Failed() <-chan struct{} {
	return a.apiServers.failed
}

func (a *Agent) listenAndServeGRPC() error {
	if len(a.config.GRPCAddrs) < 1 {
		return nil
	}

	xdsServer := xds.NewServer(
		a.logger.Named(logging.Envoy),
		a.proxyConfig,
		func(id string) (acl.Authorizer, error) {
			return a.delegate.ResolveTokenAndDefaultMeta(id, nil, nil)
		},
		a,
		a,
	)

	tlsConfig := a.tlsConfigurator
	// gRPC uses the same TLS settings as the HTTPS API. If HTTPS is not enabled
	// then gRPC should not use TLS.
	if a.config.HTTPSPort <= 0 {
		tlsConfig = nil
	}
	var err error
	a.grpcServer = xds.NewGRPCServer(xdsServer, tlsConfig)

	ln, err := a.startListeners(a.config.GRPCAddrs)
	if err != nil {
		return err
	}

	for _, l := range ln {
		go func(innerL net.Listener) {
			a.logger.Info("Started gRPC server",
				"address", innerL.Addr().String(),
				"network", innerL.Addr().Network(),
			)
			err := a.grpcServer.Serve(innerL)
			if err != nil {
				a.logger.Error("gRPC server failed", "error", err)
			}
		}(l)
	}
	return nil
}

func (a *Agent) listenAndServeDNS() error {
	notif := make(chan net.Addr, len(a.config.DNSAddrs))
	errCh := make(chan error, len(a.config.DNSAddrs))
	for _, addr := range a.config.DNSAddrs {
		// create server
		s, err := NewDNSServer(a)
		if err != nil {
			return err
		}
		a.dnsServers = append(a.dnsServers, s)

		// start server
		a.wgServers.Add(1)
		go func(addr net.Addr) {
			defer a.wgServers.Done()
			err := s.ListenAndServe(addr.Network(), addr.String(), func() { notif <- addr })
			if err != nil && !strings.Contains(err.Error(), "accept") {
				errCh <- err
			}
		}(addr)
	}

	// wait for servers to be up
	timeout := time.After(time.Second)
	var merr *multierror.Error
	for range a.config.DNSAddrs {
		select {
		case addr := <-notif:
			a.logger.Info("Started DNS server",
				"address", addr.String(),
				"network", addr.Network(),
			)

		case err := <-errCh:
			merr = multierror.Append(merr, err)
		case <-timeout:
			merr = multierror.Append(merr, fmt.Errorf("agent: timeout starting DNS servers"))
			return merr.ErrorOrNil()
		}
	}
	return merr.ErrorOrNil()
}

func (a *Agent) startListeners(addrs []net.Addr) ([]net.Listener, error) {
	var ln []net.Listener
	for _, addr := range addrs {
		var l net.Listener
		var err error

		switch x := addr.(type) {
		case *net.UnixAddr:
			l, err = a.listenSocket(x.Name)
			if err != nil {
				return nil, err
			}

		case *net.TCPAddr:
			l, err = net.Listen("tcp", x.String())
			if err != nil {
				return nil, err
			}
			l = &tcpKeepAliveListener{l.(*net.TCPListener)}

		default:
			return nil, fmt.Errorf("unsupported address type %T", addr)
		}
		ln = append(ln, l)
	}
	return ln, nil
}

// listenHTTP binds listeners to the provided addresses and also returns
// pre-configured HTTP servers which are not yet started. The motivation is
// that in the current startup/shutdown setup we de-couple the listener
// creation from the server startup assuming that if any of the listeners
// cannot be bound we fail immediately and later failures do not occur.
// Therefore, starting a server with a running listener is assumed to not
// produce an error.
//
// The second motivation is that an HTTPS server needs to use the same TLSConfig
// on both the listener and the HTTP server. When listeners and servers are
// created at different times this becomes difficult to handle without keeping
// the TLS configuration somewhere or recreating it.
//
// This approach should ultimately be refactored to the point where we just
// start the server and any error should trigger a proper shutdown of the agent.
func (a *Agent) listenHTTP() ([]apiServer, error) {
	var ln []net.Listener
	var servers []apiServer

	start := func(proto string, addrs []net.Addr) error {
		listeners, err := a.startListeners(addrs)
		if err != nil {
			return err
		}
		ln = append(ln, listeners...)

		for _, l := range listeners {
			var tlscfg *tls.Config
			_, isTCP := l.(*tcpKeepAliveListener)
			if isTCP && proto == "https" {
				tlscfg = a.tlsConfigurator.IncomingHTTPSConfig()
				l = tls.NewListener(l, tlscfg)
			}

			srv := &HTTPHandlers{
				agent:    a,
				denylist: NewDenylist(a.config.HTTPBlockEndpoints),
			}
			a.configReloaders = append(a.configReloaders, srv.ReloadConfig)
			a.httpHandlers = srv
			httpServer := &http.Server{
				Addr:           l.Addr().String(),
				TLSConfig:      tlscfg,
				Handler:        srv.handler(a.config.EnableDebug),
				MaxHeaderBytes: a.config.HTTPMaxHeaderBytes,
			}

			// Load the connlimit helper into the server
			connLimitFn := a.httpConnLimiter.HTTPConnStateFuncWithDefault429Handler(10 * time.Millisecond)

			if proto == "https" {
				if err := setupHTTPS(httpServer, connLimitFn, a.config.HTTPSHandshakeTimeout); err != nil {
					return err
				}
			} else {
				httpServer.ConnState = connLimitFn
			}

			servers = append(servers, newAPIServerHTTP(proto, l, httpServer))
		}
		return nil
	}

	if err := start("http", a.config.HTTPAddrs); err != nil {
		closeListeners(ln)
		return nil, err
	}
	if err := start("https", a.config.HTTPSAddrs); err != nil {
		closeListeners(ln)
		return nil, err
	}
	return servers, nil
}

func closeListeners(lns []net.Listener) {
	for _, l := range lns {
		l.Close()
	}
}

// setupHTTPS adds HTTP/2 support, ConnState, and a connection handshake timeout
// to the http.Server.
func setupHTTPS(server *http.Server, connState func(net.Conn, http.ConnState), timeout time.Duration) error {
	// Enforce TLS handshake timeout
	server.ConnState = func(conn net.Conn, state http.ConnState) {
		switch state {
		case http.StateNew:
			// Set deadline to prevent slow send before TLS handshake or first
			// byte of request.
			conn.SetReadDeadline(time.Now().Add(timeout))
		case http.StateActive:
			// Clear read deadline. We should maybe set read timeouts more
			// generally but that's a bigger task as some HTTP endpoints may
			// stream large requests and responses (e.g. snapshot) so we can't
			// set sensible blanket timeouts here.
			conn.SetReadDeadline(time.Time{})
		}
		// Pass through to conn limit. This is OK because we didn't change
		// state (i.e. Close conn).
		connState(conn, state)
	}

	// This will enable upgrading connections to HTTP/2 as
	// part of TLS negotiation.
	return http2.ConfigureServer(server, nil)
}

// tcpKeepAliveListener sets TCP keep-alive timeouts on accepted
// connections. It's used so dead TCP connections eventually go away.
type tcpKeepAliveListener struct {
	*net.TCPListener
}

func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) {
	tc, err := ln.AcceptTCP()
	if err != nil {
		return
	}
	tc.SetKeepAlive(true)
	tc.SetKeepAlivePeriod(30 * time.Second)
	return tc, nil
}

func (a *Agent) listenSocket(path string) (net.Listener, error) {
	if _, err := os.Stat(path); !os.IsNotExist(err) {
		a.logger.Warn("Replacing socket", "path", path)
	}
	if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
		return nil, fmt.Errorf("error removing socket file: %s", err)
	}
	l, err := net.Listen("unix", path)
	if err != nil {
		return nil, err
	}
	user, group, mode := a.config.UnixSocketUser, a.config.UnixSocketGroup, a.config.UnixSocketMode
	if err := setFilePermissions(path, user, group, mode); err != nil {
		return nil, fmt.Errorf("Failed setting up socket: %s", err)
	}
	return l, nil
}

// stopAllWatches stops all the currently running watches
func (a *Agent) stopAllWatches() {