2018-10-03 18:18:55 +00:00
|
|
|
package xds
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"sync/atomic"
|
2019-01-11 15:43:18 +00:00
|
|
|
"time"
|
2018-10-03 18:18:55 +00:00
|
|
|
|
2021-02-26 22:23:15 +00:00
|
|
|
envoy_config_core_v3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
|
|
|
|
envoy_discovery_v2 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v2"
|
|
|
|
envoy_discovery_v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
|
2021-02-22 21:00:15 +00:00
|
|
|
|
2021-05-14 18:59:13 +00:00
|
|
|
"github.com/armon/go-metrics"
|
|
|
|
"github.com/armon/go-metrics/prometheus"
|
2020-01-28 23:50:41 +00:00
|
|
|
"github.com/hashicorp/go-hclog"
|
2020-06-23 20:19:56 +00:00
|
|
|
"google.golang.org/grpc"
|
|
|
|
"google.golang.org/grpc/codes"
|
|
|
|
"google.golang.org/grpc/credentials"
|
|
|
|
"google.golang.org/grpc/metadata"
|
|
|
|
"google.golang.org/grpc/status"
|
2020-12-23 17:50:28 +00:00
|
|
|
|
|
|
|
"github.com/hashicorp/consul/acl"
|
|
|
|
"github.com/hashicorp/consul/agent/proxycfg"
|
|
|
|
"github.com/hashicorp/consul/agent/structs"
|
2021-04-29 18:54:05 +00:00
|
|
|
"github.com/hashicorp/consul/logging"
|
2020-12-23 17:50:28 +00:00
|
|
|
"github.com/hashicorp/consul/tlsutil"
|
2018-10-03 18:18:55 +00:00
|
|
|
)
|
|
|
|
|
2021-05-14 18:59:13 +00:00
|
|
|
var StatsGauges = []prometheus.GaugeDefinition{
|
|
|
|
{
|
|
|
|
Name: []string{"xds", "server", "streams"},
|
|
|
|
Help: "Measures the number of active xDS streams handled by the server split by protocol version.",
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
// ADSStream is a shorter way of referring to this thing...
|
2021-02-26 22:23:15 +00:00
|
|
|
type ADSStream = envoy_discovery_v3.AggregatedDiscoveryService_StreamAggregatedResourcesServer
|
|
|
|
type ADSStream_v2 = envoy_discovery_v2.AggregatedDiscoveryService_StreamAggregatedResourcesServer
|
2018-10-03 18:18:55 +00:00
|
|
|
|
|
|
|
const (
|
2021-02-26 22:23:15 +00:00
|
|
|
// Resource types in xDS v3. These are copied from
|
|
|
|
// envoyproxy/go-control-plane/pkg/resource/v3/resource.go since we don't need any of
|
2018-10-03 18:18:55 +00:00
|
|
|
// the rest of that package.
|
2021-02-26 22:23:15 +00:00
|
|
|
apiTypePrefix = "type.googleapis.com/"
|
2018-10-03 18:18:55 +00:00
|
|
|
|
|
|
|
// EndpointType is the TypeURL for Endpoint discovery responses.
|
2021-04-29 18:54:05 +00:00
|
|
|
EndpointType = apiTypePrefix + "envoy.config.endpoint.v3.ClusterLoadAssignment"
|
|
|
|
EndpointType_v2 = apiTypePrefix + "envoy.api.v2.ClusterLoadAssignment"
|
2018-10-03 18:18:55 +00:00
|
|
|
|
|
|
|
// ClusterType is the TypeURL for Cluster discovery responses.
|
2021-04-29 18:54:05 +00:00
|
|
|
ClusterType = apiTypePrefix + "envoy.config.cluster.v3.Cluster"
|
|
|
|
ClusterType_v2 = apiTypePrefix + "envoy.api.v2.Cluster"
|
2018-10-03 18:18:55 +00:00
|
|
|
|
|
|
|
// RouteType is the TypeURL for Route discovery responses.
|
2021-04-29 18:54:05 +00:00
|
|
|
RouteType = apiTypePrefix + "envoy.config.route.v3.RouteConfiguration"
|
|
|
|
RouteType_v2 = apiTypePrefix + "envoy.api.v2.RouteConfiguration"
|
2018-10-03 18:18:55 +00:00
|
|
|
|
|
|
|
// ListenerType is the TypeURL for Listener discovery responses.
|
2021-04-29 18:54:05 +00:00
|
|
|
ListenerType = apiTypePrefix + "envoy.config.listener.v3.Listener"
|
|
|
|
ListenerType_v2 = apiTypePrefix + "envoy.api.v2.Listener"
|
2018-10-03 18:18:55 +00:00
|
|
|
|
|
|
|
// PublicListenerName is the name we give the public listener in Envoy config.
|
|
|
|
PublicListenerName = "public_listener"
|
|
|
|
|
2021-04-12 15:35:14 +00:00
|
|
|
// OutboundListenerName is the name we give the outbound Envoy listener when transparent proxy mode is enabled.
|
2021-03-17 19:40:49 +00:00
|
|
|
OutboundListenerName = "outbound_listener"
|
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
// LocalAppClusterName is the name we give the local application "cluster" in
|
2019-04-29 16:27:57 +00:00
|
|
|
// Envoy config. Note that all cluster names may collide with service names
|
|
|
|
// since we want cluster names and service names to match to enable nice
|
|
|
|
// metrics correlation without massaging prefixes on cluster names.
|
|
|
|
//
|
|
|
|
// We should probably make this more unlikely to collide however changing it
|
|
|
|
// potentially breaks upgrade compatibility without restarting all Envoy's as
|
|
|
|
// it will no longer match their existing cluster name. Changing this will
|
|
|
|
// affect metrics output so could break dashboards (for local app traffic).
|
|
|
|
//
|
|
|
|
// We should probably just make it configurable if anyone actually has
|
|
|
|
// services named "local_app" in the future.
|
2018-10-03 18:18:55 +00:00
|
|
|
LocalAppClusterName = "local_app"
|
|
|
|
|
|
|
|
// LocalAgentClusterName is the name we give the local agent "cluster" in
|
2019-04-29 16:27:57 +00:00
|
|
|
// Envoy config. Note that all cluster names may collide with service names
|
|
|
|
// since we want cluster names and service names to match to enable nice
|
|
|
|
// metrics correlation without massaging prefixes on cluster names.
|
|
|
|
//
|
|
|
|
// We should probably make this more unlikely to collied however changing it
|
|
|
|
// potentially breaks upgrade compatibility without restarting all Envoy's as
|
|
|
|
// it will no longer match their existing cluster name. Changing this will
|
|
|
|
// affect metrics output so could break dashboards (for local agent traffic).
|
|
|
|
//
|
|
|
|
// We should probably just make it configurable if anyone actually has
|
|
|
|
// services named "local_agent" in the future.
|
2018-10-03 18:18:55 +00:00
|
|
|
LocalAgentClusterName = "local_agent"
|
2019-01-11 15:43:18 +00:00
|
|
|
|
2021-03-17 19:40:49 +00:00
|
|
|
// OriginalDestinationClusterName is the name we give to the passthrough
|
|
|
|
// cluster which redirects transparently-proxied requests to their original
|
2021-06-09 20:34:17 +00:00
|
|
|
// destination outside the mesh. This cluster prevents Consul from blocking
|
|
|
|
// connections to destinations outside of the catalog when in transparent
|
|
|
|
// proxy mode.
|
2021-03-17 19:40:49 +00:00
|
|
|
OriginalDestinationClusterName = "original-destination"
|
|
|
|
|
2019-01-11 15:43:18 +00:00
|
|
|
// DefaultAuthCheckFrequency is the default value for
|
|
|
|
// Server.AuthCheckFrequency to use when the zero value is provided.
|
|
|
|
DefaultAuthCheckFrequency = 5 * time.Minute
|
2018-10-03 18:18:55 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// ACLResolverFunc is a shim to resolve ACLs. Since ACL enforcement is so far
|
|
|
|
// entirely agent-local and all uses private methods this allows a simple shim
|
|
|
|
// to be written in the agent package to allow resolving without tightly
|
|
|
|
// coupling this to the agent.
|
2018-10-19 16:04:07 +00:00
|
|
|
type ACLResolverFunc func(id string) (acl.Authorizer, error)
|
2018-10-03 18:18:55 +00:00
|
|
|
|
2019-09-26 02:55:52 +00:00
|
|
|
// ServiceChecks is the interface the agent needs to expose
|
|
|
|
// for the xDS server to fetch a service's HTTP check definitions
|
|
|
|
type HTTPCheckFetcher interface {
|
2019-12-10 02:26:41 +00:00
|
|
|
ServiceHTTPBasedChecks(serviceID structs.ServiceID) []structs.CheckType
|
2019-09-26 02:55:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// ConfigFetcher is the interface the agent needs to expose
|
|
|
|
// for the xDS server to fetch agent config, currently only one field is fetched
|
|
|
|
type ConfigFetcher interface {
|
|
|
|
AdvertiseAddrLAN() string
|
|
|
|
}
|
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
// ConfigManager is the interface xds.Server requires to consume proxy config
|
|
|
|
// updates. It's satisfied normally by the agent's proxycfg.Manager, but allows
|
|
|
|
// easier testing without several layers of mocked cache, local state and
|
|
|
|
// proxycfg.Manager.
|
|
|
|
type ConfigManager interface {
|
2020-01-24 15:04:58 +00:00
|
|
|
Watch(proxyID structs.ServiceID) (<-chan *proxycfg.ConfigSnapshot, proxycfg.CancelFunc)
|
2018-10-03 18:18:55 +00:00
|
|
|
}
|
|
|
|
|
2020-08-27 17:20:58 +00:00
|
|
|
// Server represents a gRPC server that can handle xDS requests from Envoy. All
|
|
|
|
// of it's public members must be set before the gRPC server is started.
|
2018-10-03 18:18:55 +00:00
|
|
|
//
|
|
|
|
// A full description of the XDS protocol can be found at
|
2019-06-03 16:03:05 +00:00
|
|
|
// https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol
|
2018-10-03 18:18:55 +00:00
|
|
|
type Server struct {
|
2020-01-28 23:50:41 +00:00
|
|
|
Logger hclog.Logger
|
2018-10-03 18:18:55 +00:00
|
|
|
CfgMgr ConfigManager
|
|
|
|
ResolveToken ACLResolverFunc
|
2021-04-29 18:54:05 +00:00
|
|
|
CheckFetcher HTTPCheckFetcher
|
|
|
|
CfgFetcher ConfigFetcher
|
|
|
|
|
2019-01-11 15:43:18 +00:00
|
|
|
// AuthCheckFrequency is how often we should re-check the credentials used
|
|
|
|
// during a long-lived gRPC Stream after it has been initially established.
|
|
|
|
// This is only used during idle periods of stream interactions (i.e. when
|
|
|
|
// there has been no recent DiscoveryRequest).
|
|
|
|
AuthCheckFrequency time.Duration
|
2021-02-26 22:23:15 +00:00
|
|
|
|
|
|
|
DisableV2Protocol bool
|
2021-05-14 18:59:13 +00:00
|
|
|
|
|
|
|
activeStreams activeStreamCounters
|
|
|
|
}
|
|
|
|
|
|
|
|
// activeStreamCounters simply encapsulates two counters accessed atomically to
|
|
|
|
// ensure alignment is correct.
|
|
|
|
type activeStreamCounters struct {
|
|
|
|
xDSv3 uint64
|
|
|
|
xDSv2 uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *activeStreamCounters) Increment(xdsVersion string) func() {
|
|
|
|
var counter *uint64
|
|
|
|
switch xdsVersion {
|
|
|
|
case "v3":
|
|
|
|
counter = &c.xDSv3
|
|
|
|
case "v2":
|
|
|
|
counter = &c.xDSv2
|
|
|
|
default:
|
|
|
|
return func() {}
|
|
|
|
}
|
|
|
|
|
|
|
|
labels := []metrics.Label{{Name: "version", Value: xdsVersion}}
|
|
|
|
|
|
|
|
count := atomic.AddUint64(counter, 1)
|
|
|
|
metrics.SetGaugeWithLabels([]string{"xds", "server", "streams"}, float32(count), labels)
|
|
|
|
return func() {
|
|
|
|
count := atomic.AddUint64(counter, ^uint64(0))
|
|
|
|
metrics.SetGaugeWithLabels([]string{"xds", "server", "streams"}, float32(count), labels)
|
|
|
|
}
|
2019-01-11 15:43:18 +00:00
|
|
|
}
|
|
|
|
|
2021-04-29 18:54:05 +00:00
|
|
|
func NewServer(
|
|
|
|
logger hclog.Logger,
|
|
|
|
cfgMgr ConfigManager,
|
|
|
|
resolveToken ACLResolverFunc,
|
|
|
|
checkFetcher HTTPCheckFetcher,
|
|
|
|
cfgFetcher ConfigFetcher,
|
|
|
|
) *Server {
|
|
|
|
return &Server{
|
|
|
|
Logger: logger,
|
|
|
|
CfgMgr: cfgMgr,
|
|
|
|
ResolveToken: resolveToken,
|
|
|
|
CheckFetcher: checkFetcher,
|
|
|
|
CfgFetcher: cfgFetcher,
|
|
|
|
AuthCheckFrequency: DefaultAuthCheckFrequency,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
// StreamAggregatedResources implements
|
2021-02-26 22:23:15 +00:00
|
|
|
// envoy_discovery_v3.AggregatedDiscoveryServiceServer. This is the ADS endpoint which is
|
2018-10-03 18:18:55 +00:00
|
|
|
// the only xDS API we directly support for now.
|
2021-04-29 18:54:05 +00:00
|
|
|
//
|
|
|
|
// Deprecated: use DeltaAggregatedResources instead
|
2018-10-03 18:18:55 +00:00
|
|
|
func (s *Server) StreamAggregatedResources(stream ADSStream) error {
|
2021-04-29 18:54:05 +00:00
|
|
|
return errors.New("not implemented")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Deprecated: remove when xDS v2 is no longer supported
|
|
|
|
func (s *Server) streamAggregatedResources(stream ADSStream) error {
|
2021-05-14 18:59:13 +00:00
|
|
|
defer s.activeStreams.Increment("v2")()
|
|
|
|
|
2021-04-29 18:54:05 +00:00
|
|
|
// Note: despite dealing entirely in v3 protobufs, this function is
|
|
|
|
// exclusively used from the xDS v2 shim RPC handler, so the logging below
|
|
|
|
// will refer to it as "v2".
|
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
// a channel for receiving incoming requests
|
2021-02-26 22:23:15 +00:00
|
|
|
reqCh := make(chan *envoy_discovery_v3.DiscoveryRequest)
|
2018-10-03 18:18:55 +00:00
|
|
|
reqStop := int32(0)
|
|
|
|
go func() {
|
|
|
|
for {
|
|
|
|
req, err := stream.Recv()
|
|
|
|
if atomic.LoadInt32(&reqStop) != 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
close(reqCh)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
reqCh <- req
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
err := s.process(stream, reqCh)
|
|
|
|
if err != nil {
|
2021-04-29 18:54:05 +00:00
|
|
|
s.Logger.Error("Error handling ADS stream", "xdsVersion", "v2", "error", err)
|
2018-10-03 18:18:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// prevents writing to a closed channel if send failed on blocked recv
|
|
|
|
atomic.StoreInt32(&reqStop, 1)
|
|
|
|
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
const (
|
|
|
|
stateInit int = iota
|
2019-01-11 15:43:18 +00:00
|
|
|
statePendingInitialConfig
|
2018-10-03 18:18:55 +00:00
|
|
|
stateRunning
|
|
|
|
)
|
|
|
|
|
2021-04-29 18:54:05 +00:00
|
|
|
// Deprecated: remove when xDS v2 is no longer supported
|
2021-02-26 22:23:15 +00:00
|
|
|
func (s *Server) process(stream ADSStream, reqCh <-chan *envoy_discovery_v3.DiscoveryRequest) error {
|
2018-10-03 18:18:55 +00:00
|
|
|
// xDS requires a unique nonce to correlate response/request pairs
|
|
|
|
var nonce uint64
|
|
|
|
|
|
|
|
// xDS works with versions of configs. Internally we don't have a consistent
|
2020-03-27 20:08:25 +00:00
|
|
|
// version. We could hash the config since versions don't have to be
|
|
|
|
// ordered as far as I can tell, but it is cheaper to increment a counter
|
2018-10-03 18:18:55 +00:00
|
|
|
// every time we observe a new config since the upstream proxycfg package only
|
|
|
|
// delivers updates when there are actual changes.
|
|
|
|
var configVersion uint64
|
|
|
|
|
|
|
|
// Loop state
|
2020-07-09 22:04:51 +00:00
|
|
|
var (
|
2021-04-29 18:54:05 +00:00
|
|
|
cfgSnap *proxycfg.ConfigSnapshot
|
|
|
|
req *envoy_discovery_v3.DiscoveryRequest
|
|
|
|
node *envoy_config_core_v3.Node
|
|
|
|
ok bool
|
|
|
|
stateCh <-chan *proxycfg.ConfigSnapshot
|
|
|
|
watchCancel func()
|
|
|
|
proxyID structs.ServiceID
|
|
|
|
)
|
|
|
|
|
|
|
|
generator := newResourceGenerator(
|
|
|
|
s.Logger.Named(logging.XDS).With("xdsVersion", "v2"),
|
|
|
|
s.CheckFetcher,
|
|
|
|
s.CfgFetcher,
|
|
|
|
false,
|
2020-07-09 22:04:51 +00:00
|
|
|
)
|
2018-10-03 18:18:55 +00:00
|
|
|
|
|
|
|
// need to run a small state machine to get through initial authentication.
|
|
|
|
var state = stateInit
|
|
|
|
|
|
|
|
// Configure handlers for each type of request
|
|
|
|
handlers := map[string]*xDSType{
|
2020-03-27 20:08:25 +00:00
|
|
|
EndpointType: {
|
2021-04-29 18:54:05 +00:00
|
|
|
generator: generator,
|
2018-10-03 18:18:55 +00:00
|
|
|
typeURL: EndpointType,
|
|
|
|
stream: stream,
|
|
|
|
},
|
2020-03-27 20:08:25 +00:00
|
|
|
ClusterType: {
|
2021-04-29 18:54:05 +00:00
|
|
|
generator: generator,
|
2018-10-03 18:18:55 +00:00
|
|
|
typeURL: ClusterType,
|
|
|
|
stream: stream,
|
2019-11-26 21:55:13 +00:00
|
|
|
allowEmptyFn: func(cfgSnap *proxycfg.ConfigSnapshot) bool {
|
2020-05-07 21:19:25 +00:00
|
|
|
// Mesh, Ingress, and Terminating gateways are allowed to inform CDS of
|
|
|
|
// no clusters.
|
|
|
|
return cfgSnap.Kind == structs.ServiceKindMeshGateway ||
|
|
|
|
cfgSnap.Kind == structs.ServiceKindTerminatingGateway ||
|
|
|
|
cfgSnap.Kind == structs.ServiceKindIngressGateway
|
2019-11-26 21:55:13 +00:00
|
|
|
},
|
2018-10-03 18:18:55 +00:00
|
|
|
},
|
2020-03-27 20:08:25 +00:00
|
|
|
RouteType: {
|
2021-04-29 18:54:05 +00:00
|
|
|
generator: generator,
|
2018-10-03 18:18:55 +00:00
|
|
|
typeURL: RouteType,
|
|
|
|
stream: stream,
|
2020-05-07 21:19:25 +00:00
|
|
|
allowEmptyFn: func(cfgSnap *proxycfg.ConfigSnapshot) bool {
|
|
|
|
return cfgSnap.Kind == structs.ServiceKindIngressGateway
|
|
|
|
},
|
2018-10-03 18:18:55 +00:00
|
|
|
},
|
2020-03-27 20:08:25 +00:00
|
|
|
ListenerType: {
|
2021-04-29 18:54:05 +00:00
|
|
|
generator: generator,
|
2018-10-03 18:18:55 +00:00
|
|
|
typeURL: ListenerType,
|
|
|
|
stream: stream,
|
2020-05-07 21:19:25 +00:00
|
|
|
allowEmptyFn: func(cfgSnap *proxycfg.ConfigSnapshot) bool {
|
|
|
|
return cfgSnap.Kind == structs.ServiceKindIngressGateway
|
|
|
|
},
|
2018-10-03 18:18:55 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2019-01-11 15:43:18 +00:00
|
|
|
var authTimer <-chan time.Time
|
|
|
|
extendAuthTimer := func() {
|
|
|
|
authTimer = time.After(s.AuthCheckFrequency)
|
|
|
|
}
|
|
|
|
|
|
|
|
checkStreamACLs := func(cfgSnap *proxycfg.ConfigSnapshot) error {
|
2021-08-13 15:53:19 +00:00
|
|
|
return s.authorize(stream.Context(), cfgSnap)
|
2019-01-11 15:43:18 +00:00
|
|
|
}
|
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
for {
|
|
|
|
select {
|
2019-01-11 15:43:18 +00:00
|
|
|
case <-authTimer:
|
|
|
|
// It's been too long since a Discovery{Request,Response} so recheck ACLs.
|
|
|
|
if err := checkStreamACLs(cfgSnap); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
extendAuthTimer()
|
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
case req, ok = <-reqCh:
|
|
|
|
if !ok {
|
|
|
|
// reqCh is closed when stream.Recv errors which is how we detect client
|
2019-03-06 17:13:28 +00:00
|
|
|
// going away. AFAICT the stream.Context() is only canceled once the
|
2018-10-03 18:18:55 +00:00
|
|
|
// RPC method returns which it can't until we return from this one so
|
|
|
|
// there's no point in blocking on that.
|
|
|
|
return nil
|
|
|
|
}
|
2021-04-29 18:54:05 +00:00
|
|
|
|
|
|
|
generator.logTraceRequest("SOTW xDS v2", req)
|
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
if req.TypeUrl == "" {
|
|
|
|
return status.Errorf(codes.InvalidArgument, "type URL is required for ADS")
|
|
|
|
}
|
2020-07-09 22:04:51 +00:00
|
|
|
|
|
|
|
if node == nil && req.Node != nil {
|
|
|
|
node = req.Node
|
2020-07-31 20:52:49 +00:00
|
|
|
var err error
|
2021-04-29 18:54:05 +00:00
|
|
|
generator.ProxyFeatures, err = determineSupportedProxyFeatures(req.Node)
|
2020-07-31 20:52:49 +00:00
|
|
|
if err != nil {
|
|
|
|
return status.Errorf(codes.InvalidArgument, err.Error())
|
|
|
|
}
|
2020-07-09 22:04:51 +00:00
|
|
|
}
|
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
if handler, ok := handlers[req.TypeUrl]; ok {
|
2021-04-29 18:54:05 +00:00
|
|
|
handler.Recv(req, node)
|
2018-10-03 18:18:55 +00:00
|
|
|
}
|
|
|
|
case cfgSnap = <-stateCh:
|
|
|
|
// We got a new config, update the version counter
|
|
|
|
configVersion++
|
|
|
|
}
|
|
|
|
|
|
|
|
// Trigger state machine
|
|
|
|
switch state {
|
|
|
|
case stateInit:
|
|
|
|
if req == nil {
|
|
|
|
// This can't happen (tm) since stateCh is nil until after the first req
|
|
|
|
// is received but lets not panic about it.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// Start authentication process, we need the proxyID
|
2020-01-24 15:04:58 +00:00
|
|
|
proxyID = structs.NewServiceID(req.Node.Id, parseEnterpriseMeta(req.Node))
|
2018-10-03 18:18:55 +00:00
|
|
|
|
|
|
|
// Start watching config for that proxy
|
|
|
|
stateCh, watchCancel = s.CfgMgr.Watch(proxyID)
|
|
|
|
// Note that in this case we _intend_ the defer to only be triggered when
|
|
|
|
// this whole process method ends (i.e. when streaming RPC aborts) not at
|
|
|
|
// the end of the current loop iteration. We have to do it in the loop
|
|
|
|
// here since we can't start watching until we get to this state in the
|
|
|
|
// state machine.
|
|
|
|
defer watchCancel()
|
|
|
|
|
2021-04-29 18:54:05 +00:00
|
|
|
generator.Logger.Trace("watching proxy, pending initial proxycfg snapshot",
|
2021-02-05 22:15:52 +00:00
|
|
|
"service_id", proxyID.String())
|
2021-02-02 19:26:38 +00:00
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
// Now wait for the config so we can check ACL
|
2019-01-11 15:43:18 +00:00
|
|
|
state = statePendingInitialConfig
|
|
|
|
case statePendingInitialConfig:
|
2018-10-03 18:18:55 +00:00
|
|
|
if cfgSnap == nil {
|
|
|
|
// Nothing we can do until we get the initial config
|
|
|
|
continue
|
|
|
|
}
|
2019-01-11 15:43:18 +00:00
|
|
|
|
|
|
|
// Got config, try to authenticate next.
|
2018-10-03 18:18:55 +00:00
|
|
|
state = stateRunning
|
|
|
|
|
2021-04-29 18:54:05 +00:00
|
|
|
// Upgrade the logger based on Kind.
|
|
|
|
switch cfgSnap.Kind {
|
|
|
|
case structs.ServiceKindConnectProxy:
|
|
|
|
case structs.ServiceKindTerminatingGateway:
|
|
|
|
generator.Logger = generator.Logger.Named(logging.TerminatingGateway)
|
|
|
|
case structs.ServiceKindMeshGateway:
|
|
|
|
generator.Logger = generator.Logger.Named(logging.MeshGateway)
|
|
|
|
case structs.ServiceKindIngressGateway:
|
|
|
|
generator.Logger = generator.Logger.Named(logging.IngressGateway)
|
|
|
|
}
|
|
|
|
|
|
|
|
generator.Logger.Trace("Got initial config snapshot",
|
2021-02-05 22:15:52 +00:00
|
|
|
"service_id", cfgSnap.ProxyID.String())
|
2021-02-02 19:26:38 +00:00
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
// Lets actually process the config we just got or we'll mis responding
|
|
|
|
fallthrough
|
|
|
|
case stateRunning:
|
2019-01-11 15:43:18 +00:00
|
|
|
// Check ACLs on every Discovery{Request,Response}.
|
|
|
|
if err := checkStreamACLs(cfgSnap); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// For the first time through the state machine, this is when the
|
|
|
|
// timer is first started.
|
|
|
|
extendAuthTimer()
|
|
|
|
|
2021-04-29 18:54:05 +00:00
|
|
|
generator.Logger.Trace("Invoking all xDS resource handlers and sending new data if there is any",
|
2021-02-05 22:15:52 +00:00
|
|
|
"service_id", cfgSnap.ProxyID.String())
|
2021-02-02 19:26:38 +00:00
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
// See if any handlers need to have the current (possibly new) config
|
|
|
|
// sent. Note the order here is actually significant so we can't just
|
|
|
|
// range the map which has no determined order. It's important because:
|
|
|
|
//
|
|
|
|
// 1. Envoy needs to see a consistent snapshot to avoid potentially
|
|
|
|
// dropping traffic due to inconsistencies. This is the
|
|
|
|
// main win of ADS after all - we get to control this order.
|
|
|
|
// 2. Non-determinsic order of complex protobuf responses which are
|
|
|
|
// compared for non-exact JSON equivalence makes the tests uber-messy
|
|
|
|
// to handle
|
|
|
|
for _, typeURL := range []string{ClusterType, EndpointType, RouteType, ListenerType} {
|
|
|
|
handler := handlers[typeURL]
|
|
|
|
if err := handler.SendIfNew(cfgSnap, configVersion, &nonce); err != nil {
|
2021-04-29 18:54:05 +00:00
|
|
|
return status.Errorf(codes.Unavailable,
|
|
|
|
"failed to send reply for type %q: %v",
|
|
|
|
typeURL, err)
|
2018-10-03 18:18:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-29 18:54:05 +00:00
|
|
|
// Deprecated: remove when xDS v2 is no longer supported
|
2018-10-03 18:18:55 +00:00
|
|
|
type xDSType struct {
|
2021-04-29 18:54:05 +00:00
|
|
|
generator *ResourceGenerator
|
|
|
|
typeURL string
|
|
|
|
stream ADSStream
|
|
|
|
req *envoy_discovery_v3.DiscoveryRequest
|
|
|
|
node *envoy_config_core_v3.Node
|
|
|
|
lastNonce string
|
2018-10-03 18:18:55 +00:00
|
|
|
// lastVersion is the version that was last sent to the proxy. It is needed
|
|
|
|
// because we don't want to send the same version more than once.
|
|
|
|
// req.VersionInfo may be an older version than the most recent once sent in
|
|
|
|
// two cases: 1) if the ACK wasn't received yet and `req` still points to the
|
|
|
|
// previous request we already responded to and 2) if the proxy rejected the
|
|
|
|
// last version we sent with a Nack then req.VersionInfo will be the older
|
|
|
|
// version it's hanging on to.
|
2019-11-26 21:55:13 +00:00
|
|
|
lastVersion uint64
|
|
|
|
allowEmptyFn func(cfgSnap *proxycfg.ConfigSnapshot) bool
|
2018-10-03 18:18:55 +00:00
|
|
|
}
|
|
|
|
|
2021-04-29 18:54:05 +00:00
|
|
|
func (t *xDSType) Recv(req *envoy_discovery_v3.DiscoveryRequest, node *envoy_config_core_v3.Node) {
|
2018-10-03 18:18:55 +00:00
|
|
|
if t.lastNonce == "" || t.lastNonce == req.GetResponseNonce() {
|
|
|
|
t.req = req
|
2020-07-09 22:04:51 +00:00
|
|
|
t.node = node
|
2018-10-03 18:18:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *xDSType) SendIfNew(cfgSnap *proxycfg.ConfigSnapshot, version uint64, nonce *uint64) error {
|
|
|
|
if t.req == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if t.lastVersion >= version {
|
|
|
|
// Already sent this version
|
|
|
|
return nil
|
|
|
|
}
|
2020-07-09 22:04:51 +00:00
|
|
|
|
2021-04-29 18:54:05 +00:00
|
|
|
resources, err := t.generator.resourcesFromSnapshot(t.typeURL, cfgSnap)
|
2018-10-03 18:18:55 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-11-26 21:55:13 +00:00
|
|
|
|
|
|
|
allowEmpty := t.allowEmptyFn != nil && t.allowEmptyFn(cfgSnap)
|
|
|
|
|
2019-03-22 19:37:14 +00:00
|
|
|
// Zero length resource responses should be ignored and are the result of no
|
|
|
|
// data yet. Notice that this caused a bug originally where we had zero
|
|
|
|
// healthy endpoints for an upstream that would cause Envoy to hang waiting
|
|
|
|
// for the EDS response. This is fixed though by ensuring we send an explicit
|
|
|
|
// empty LoadAssignment resource for the cluster rather than allowing junky
|
|
|
|
// empty resources.
|
2019-11-26 21:55:13 +00:00
|
|
|
if len(resources) == 0 && !allowEmpty {
|
2018-10-03 18:18:55 +00:00
|
|
|
// Nothing to send yet
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Note we only increment nonce when we actually send - not important for
|
|
|
|
// correctness but makes tests much simpler when we skip a type like Routes
|
|
|
|
// with nothing to send.
|
|
|
|
*nonce++
|
|
|
|
nonceStr := fmt.Sprintf("%08x", *nonce)
|
|
|
|
versionStr := fmt.Sprintf("%08x", version)
|
|
|
|
|
|
|
|
resp, err := createResponse(t.typeURL, versionStr, nonceStr, resources)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-04-29 18:54:05 +00:00
|
|
|
t.generator.logTraceResponse("SOTW xDS v2", resp)
|
|
|
|
|
2018-10-03 18:18:55 +00:00
|
|
|
err = t.stream.Send(resp)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
t.lastVersion = version
|
|
|
|
t.lastNonce = nonceStr
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func tokenFromContext(ctx context.Context) string {
|
|
|
|
md, ok := metadata.FromIncomingContext(ctx)
|
|
|
|
if !ok {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
toks, ok := md["x-consul-token"]
|
|
|
|
if ok && len(toks) > 0 {
|
|
|
|
return toks[0]
|
|
|
|
}
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
2021-07-09 16:17:45 +00:00
|
|
|
// NewGRPCServer creates a grpc.Server, registers the Server, and then returns
|
|
|
|
// the grpc.Server.
|
|
|
|
func NewGRPCServer(s *Server, tlsConfigurator *tlsutil.Configurator) *grpc.Server {
|
2018-10-03 18:18:55 +00:00
|
|
|
opts := []grpc.ServerOption{
|
|
|
|
grpc.MaxConcurrentStreams(2048),
|
|
|
|
}
|
2020-01-22 10:32:17 +00:00
|
|
|
if tlsConfigurator != nil {
|
|
|
|
if tlsConfigurator.Cert() != nil {
|
2021-07-09 16:17:45 +00:00
|
|
|
creds := credentials.NewTLS(tlsConfigurator.IncomingXDSConfig())
|
2020-01-22 10:32:17 +00:00
|
|
|
opts = append(opts, grpc.Creds(creds))
|
2018-10-03 18:18:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
srv := grpc.NewServer(opts...)
|
2021-02-26 22:23:15 +00:00
|
|
|
envoy_discovery_v3.RegisterAggregatedDiscoveryServiceServer(srv, s)
|
|
|
|
|
|
|
|
if !s.DisableV2Protocol {
|
|
|
|
envoy_discovery_v2.RegisterAggregatedDiscoveryServiceServer(srv, &adsServerV2Shim{srv: s})
|
|
|
|
}
|
2021-07-09 16:17:45 +00:00
|
|
|
return srv
|
2018-10-03 18:18:55 +00:00
|
|
|
}
|
2021-04-29 18:54:05 +00:00
|
|
|
|
2021-08-13 15:53:19 +00:00
|
|
|
// authorize the xDS request using the token stored in ctx. This authorization is
|
|
|
|
// a bit different from most interfaces. Instead of explicitly authorizing or
|
|
|
|
// filtering each piece of data in the response, the request is authorized
|
|
|
|
// by checking the token has `service:write` for the service ID of the destination
|
|
|
|
// service (for kind=ConnectProxy), or the gateway service (for other kinds).
|
|
|
|
// This authorization strategy requires that agent/proxycfg only fetches data
|
|
|
|
// using a token with the same permissions, and that it stores the data by
|
|
|
|
// proxy ID. We assume that any data in the snapshot was already filtered,
|
|
|
|
// which allows this authorization to be a shallow authorization check
|
|
|
|
// for all the data in a ConfigSnapshot.
|
|
|
|
func (s *Server) authorize(ctx context.Context, cfgSnap *proxycfg.ConfigSnapshot) error {
|
2021-04-29 18:54:05 +00:00
|
|
|
if cfgSnap == nil {
|
|
|
|
return status.Errorf(codes.Unauthenticated, "unauthenticated: no config snapshot")
|
|
|
|
}
|
|
|
|
|
2021-08-13 15:53:19 +00:00
|
|
|
authz, err := s.ResolveToken(tokenFromContext(ctx))
|
2021-04-29 18:54:05 +00:00
|
|
|
if acl.IsErrNotFound(err) {
|
|
|
|
return status.Errorf(codes.Unauthenticated, "unauthenticated: %v", err)
|
|
|
|
} else if acl.IsErrPermissionDenied(err) {
|
|
|
|
return status.Errorf(codes.PermissionDenied, "permission denied: %v", err)
|
|
|
|
} else if err != nil {
|
|
|
|
return status.Errorf(codes.Internal, "error resolving acl token: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var authzContext acl.AuthorizerContext
|
|
|
|
switch cfgSnap.Kind {
|
|
|
|
case structs.ServiceKindConnectProxy:
|
|
|
|
cfgSnap.ProxyID.EnterpriseMeta.FillAuthzContext(&authzContext)
|
2021-08-04 21:51:19 +00:00
|
|
|
if authz.ServiceWrite(cfgSnap.Proxy.DestinationServiceName, &authzContext) != acl.Allow {
|
2021-04-29 18:54:05 +00:00
|
|
|
return status.Errorf(codes.PermissionDenied, "permission denied")
|
|
|
|
}
|
|
|
|
case structs.ServiceKindMeshGateway, structs.ServiceKindTerminatingGateway, structs.ServiceKindIngressGateway:
|
|
|
|
cfgSnap.ProxyID.EnterpriseMeta.FillAuthzContext(&authzContext)
|
2021-08-04 21:51:19 +00:00
|
|
|
if authz.ServiceWrite(cfgSnap.Service, &authzContext) != acl.Allow {
|
2021-04-29 18:54:05 +00:00
|
|
|
return status.Errorf(codes.PermissionDenied, "permission denied")
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return status.Errorf(codes.Internal, "Invalid service kind")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Authed OK!
|
|
|
|
return nil
|
|
|
|
}
|