open-consul/agent/grpc-internal/resolver/resolver.go
Matt Keeler e909289454
Various bits of cleanup detected when using Go Workspaces (#17462)
TLDR with many modules the versions included in each diverged quite a bit. Attempting to use Go Workspaces produces a bunch of errors.

This commit:

1. Fixes envoy-library-references.sh to work again
2. Ensures we are pulling in go-control-plane@v0.11.0 everywhere (previously it was at that version in some modules and others were much older)
3. Remove one usage of golang/protobuf that caused us to have a direct dependency on it.
4. Remove deprecated usage of the Endpoint field in the grpc resolver.Target struct. The current version of grpc (v1.55.0) has removed that field and recommended replacement with URL.Opaque and calls to the Endpoint() func when needing to consume the previous field.
4. `go work init <all the paths to go.mod files>` && `go work sync`. This syncrhonized versions of dependencies from the main workspace/root module to all submodules
5. Updated .gitignore to ignore the go.work and go.work.sum files. This seems to be standard practice at the moment.
6. Update doc comments in protoc-gen-consul-rate-limit to be go fmt compatible
7. Upgraded makefile infra to perform linting, testing and go mod tidy on all modules in a flexible manner.
8. Updated linter rules to prevent usage of golang/protobuf
9. Updated a leader peering test to account for an extra colon in a grpc error message.
2023-06-05 16:08:39 -04:00

343 lines
9.9 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package resolver
import (
"fmt"
"strings"
"sync"
"google.golang.org/grpc/resolver"
"github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/types"
)
// ServerResolverBuilder tracks the current server list and keeps any
// ServerResolvers updated when changes occur.
type ServerResolverBuilder struct {
cfg Config
// leaderResolver is used to track the address of the leader in the local DC.
leaderResolver leaderResolver
// servers is an index of Servers by area and Server.ID. The map contains server IDs
// for all datacenters.
servers map[types.AreaID]map[string]*metadata.Server
// resolvers is an index of connections to the serverResolver which manages
// addresses of servers for that connection.
//
// this is only applicable for non-leader conn types
resolvers map[resolver.ClientConn]*serverResolver
// lock for all stateful fields (excludes config which is immutable).
lock sync.RWMutex
}
type Config struct {
// Datacenter is the datacenter of this agent.
Datacenter string
// AgentType is either 'server' or 'client' and is required.
AgentType string
// Authority used to query the server. Defaults to "". Used to support
// parallel testing because gRPC registers resolvers globally.
Authority string
}
func NewServerResolverBuilder(cfg Config) *ServerResolverBuilder {
if cfg.Datacenter == "" {
panic("ServerResolverBuilder needs Config.Datacenter to be nonempty")
}
switch cfg.AgentType {
case "server", "client":
default:
panic("ServerResolverBuilder needs Config.AgentType to be either server or client")
}
return &ServerResolverBuilder{
cfg: cfg,
servers: make(map[types.AreaID]map[string]*metadata.Server),
resolvers: make(map[resolver.ClientConn]*serverResolver),
}
}
// ServerForGlobalAddr returns server metadata for a server with the specified globally unique address.
func (s *ServerResolverBuilder) ServerForGlobalAddr(globalAddr string) (*metadata.Server, error) {
s.lock.RLock()
defer s.lock.RUnlock()
for _, areaServers := range s.servers {
for _, server := range areaServers {
if DCPrefix(server.Datacenter, server.Addr.String()) == globalAddr {
return server, nil
}
}
}
return nil, fmt.Errorf("failed to find Consul server for global address %q", globalAddr)
}
// Build returns a new serverResolver for the given ClientConn. The resolver
// will keep the ClientConn's state updated based on updates from Serf.
func (s *ServerResolverBuilder) Build(target resolver.Target, cc resolver.ClientConn, _ resolver.BuildOptions) (resolver.Resolver, error) {
s.lock.Lock()
defer s.lock.Unlock()
// If there's already a resolver for this connection, return it.
// TODO(streaming): how would this happen since we already cache connections in ClientConnPool?
if cc == s.leaderResolver.clientConn {
return s.leaderResolver, nil
}
if resolver, ok := s.resolvers[cc]; ok {
return resolver, nil
}
//nolint:staticcheck
serverType, datacenter, err := parseEndpoint(target.Endpoint())
if err != nil {
return nil, err
}
if serverType == "leader" {
// TODO: is this safe? can we ever have multiple CC for the leader? Seems
// like we can only have one given the caching in ClientConnPool.Dial
s.leaderResolver.clientConn = cc
s.leaderResolver.updateClientConn()
return s.leaderResolver, nil
}
// Make a new resolver for the dc and add it to the list of active ones.
resolver := &serverResolver{
datacenter: datacenter,
clientConn: cc,
close: func() {
s.lock.Lock()
defer s.lock.Unlock()
delete(s.resolvers, cc)
},
}
resolver.updateAddrs(s.getDCAddrs(datacenter))
s.resolvers[cc] = resolver
return resolver, nil
}
// parseEndpoint parses a string, expecting a format of "serverType.datacenter"
func parseEndpoint(target string) (string, string, error) {
parts := strings.SplitN(target, ".", 2)
if len(parts) != 2 {
return "", "", fmt.Errorf("unexpected endpoint address: %v", target)
}
return parts[0], parts[1], nil
}
func (s *ServerResolverBuilder) Authority() string {
return s.cfg.Authority
}
// AddServer updates the resolvers' states to include the new server's address.
func (s *ServerResolverBuilder) AddServer(areaID types.AreaID, server *metadata.Server) {
if s.shouldIgnoreServer(areaID, server) {
return
}
s.lock.Lock()
defer s.lock.Unlock()
areaServers, ok := s.servers[areaID]
if !ok {
areaServers = make(map[string]*metadata.Server)
s.servers[areaID] = areaServers
}
areaServers[uniqueID(server)] = server
addrs := s.getDCAddrs(server.Datacenter)
for _, resolver := range s.resolvers {
if resolver.datacenter == server.Datacenter {
resolver.updateAddrs(addrs)
}
}
}
// uniqueID returns a unique identifier for the server which includes the
// Datacenter and the ID.
//
// In practice it is expected that the server.ID is already a globally unique
// UUID. This function is an extra safeguard in case that ever changes.
func uniqueID(server *metadata.Server) string {
return server.Datacenter + "-" + server.ID
}
// DCPrefix prefixes the given string with a datacenter for use in
// disambiguation.
func DCPrefix(datacenter, suffix string) string {
return datacenter + "-" + suffix
}
// RemoveServer updates the resolvers' states with the given server removed.
func (s *ServerResolverBuilder) RemoveServer(areaID types.AreaID, server *metadata.Server) {
if s.shouldIgnoreServer(areaID, server) {
return
}
s.lock.Lock()
defer s.lock.Unlock()
areaServers, ok := s.servers[areaID]
if !ok {
return // already gone
}
delete(areaServers, uniqueID(server))
if len(areaServers) == 0 {
delete(s.servers, areaID)
}
addrs := s.getDCAddrs(server.Datacenter)
for _, resolver := range s.resolvers {
if resolver.datacenter == server.Datacenter {
resolver.updateAddrs(addrs)
}
}
}
// shouldIgnoreServer is used to contextually decide if a particular kind of
// server should be accepted into a given area.
//
// On client agents it's pretty easy: clients only participate in the standard
// LAN, so we only accept servers from the LAN.
//
// On server agents it's a little less obvious. This resolver is ultimately
// used to have servers dial other servers. If a server is going to cross
// between datacenters (using traditional federation) then we want to use the
// WAN addresses for them, but if a server is going to dial a sibling server in
// the same datacenter we want it to use the LAN addresses always. To achieve
// that here we simply never allow WAN servers for our current datacenter to be
// added into the resolver, letting only the LAN instances through.
func (s *ServerResolverBuilder) shouldIgnoreServer(areaID types.AreaID, server *metadata.Server) bool {
if s.cfg.AgentType == "client" && areaID != types.AreaLAN {
return true
}
if s.cfg.AgentType == "server" &&
server.Datacenter == s.cfg.Datacenter &&
areaID != types.AreaLAN {
return true
}
return false
}
// getDCAddrs returns a list of the server addresses for the given datacenter.
// This method requires that lock is held for reads.
func (s *ServerResolverBuilder) getDCAddrs(dc string) []resolver.Address {
lanRequest := (s.cfg.Datacenter == dc)
var (
addrs []resolver.Address
keptServerIDs = make(map[string]struct{})
)
for areaID, areaServers := range s.servers {
if (areaID == types.AreaLAN) != lanRequest {
// LAN requests only look at LAN data. WAN requests only look at
// WAN data.
continue
}
for _, server := range areaServers {
if server.Datacenter != dc {
continue
}
// Servers may be part of multiple areas, so only include each one once.
if _, ok := keptServerIDs[server.ID]; ok {
continue
}
keptServerIDs[server.ID] = struct{}{}
addrs = append(addrs, resolver.Address{
// NOTE: the address persisted here is only dialable using our custom dialer
Addr: DCPrefix(server.Datacenter, server.Addr.String()),
ServerName: server.Name,
})
}
}
return addrs
}
// UpdateLeaderAddr updates the leader address in the local DC's resolver.
func (s *ServerResolverBuilder) UpdateLeaderAddr(datacenter, addr string) {
s.lock.Lock()
defer s.lock.Unlock()
s.leaderResolver.globalAddr = DCPrefix(datacenter, addr)
s.leaderResolver.updateClientConn()
}
// serverResolver is a grpc Resolver that will keep a grpc.ClientConn up to date
// on the list of server addresses to use.
type serverResolver struct {
// datacenter that can be reached by the clientConn. Used by ServerResolverBuilder
// to filter resolvers for those in a specific datacenter.
datacenter string
// clientConn that this resolver is providing addresses for.
clientConn resolver.ClientConn
// close is used by ServerResolverBuilder to remove this resolver from the
// index of resolvers. It is called by grpc when the connection is closed.
close func()
// addrs stores the list of addresses passed to updateAddrs, so that they
// can be rebalanced periodically by ServerResolverBuilder.
addrs []resolver.Address
addrLock sync.Mutex
}
var _ resolver.Resolver = (*serverResolver)(nil)
// updateAddrs updates this serverResolver's ClientConn to use the given set of
// addrs.
func (r *serverResolver) updateAddrs(addrs []resolver.Address) {
r.addrLock.Lock()
defer r.addrLock.Unlock()
r.clientConn.UpdateState(resolver.State{Addresses: addrs})
r.addrs = addrs
}
func (r *serverResolver) Close() {
r.close()
}
// ResolveNow is not used
func (*serverResolver) ResolveNow(options resolver.ResolveNowOptions) {}
type leaderResolver struct {
globalAddr string
clientConn resolver.ClientConn
}
func (l leaderResolver) ResolveNow(resolver.ResolveNowOptions) {}
func (l leaderResolver) Close() {}
func (l leaderResolver) updateClientConn() {
if l.globalAddr == "" || l.clientConn == nil {
return
}
addrs := []resolver.Address{
{
// NOTE: the address persisted here is only dialable using our custom dialer
Addr: l.globalAddr,
ServerName: "leader",
},
}
l.clientConn.UpdateState(resolver.State{Addresses: addrs})
}
var _ resolver.Resolver = (*leaderResolver)(nil)