open-consul/connect/resolver.go
Paul Banks 92fe8c8e89 Add Proxy Upstreams to Service Definition (#4639)
* Refactor Service Definition ProxyDestination.

This includes:
 - Refactoring all internal structs used
 - Updated tests for both deprecated and new input for:
   - Agent Services endpoint response
   - Agent Service endpoint response
   - Agent Register endpoint
     - Unmanaged deprecated field
     - Unmanaged new fields
     - Managed deprecated upstreams
     - Managed new
   - Catalog Register
     - Unmanaged deprecated field
     - Unmanaged new fields
     - Managed deprecated upstreams
     - Managed new
   - Catalog Services endpoint response
   - Catalog Node endpoint response
   - Catalog Service endpoint response
 - Updated API tests for all of the above too (both deprecated and new forms of register)

TODO:
 - config package changes for on-disk service definitions
 - proxy config endpoint
 - built-in proxy support for new fields

* Agent proxy config endpoint updated with upstreams

* Config file changes for upstreams.

* Add upstream opaque config and update all tests to ensure it works everywhere.

* Built in proxy working with new Upstreams config

* Command fixes and deprecations

* Fix key translation, upstream type defaults and a spate of other subtele bugs found with ned to end test scripts...

TODO: tests still failing on one case that needs a fix. I think it's key translation for upstreams nested in Managed proxy struct.

* Fix translated keys in API registration.
≈

* Fixes from docs
 - omit some empty undocumented fields in API
 - Bring back ServiceProxyDestination in Catalog responses to not break backwards compat - this was removed assuming it was only used internally.

* Documentation updates for Upstreams in service definition

* Fixes for tests broken by many refactors.

* Enable travis on f-connect branch in this branch too.

* Add consistent Deprecation comments to ProxyDestination uses

* Update version number on deprecation notices, and correct upstream datacenter field with explanation in docs
2018-10-10 16:55:34 +01:00

298 lines
9.2 KiB
Go

package connect
import (
"context"
"fmt"
"math/rand"
"strings"
"sync"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/api"
)
// Resolver is the interface implemented by a service discovery mechanism to get
// the address and identity of an instance to connect to via Connect as a
// client.
type Resolver interface {
// Resolve returns a single service instance to connect to. Implementations
// may attempt to ensure the instance returned is currently available. It is
// expected that a client will re-dial on a connection failure so making an
// effort to return a different service instance each time where available
// increases reliability. The context passed can be used to impose timeouts
// which may or may not be respected by implementations that make network
// calls to resolve the service. The addr returned is a string in any valid
// form for passing directly to `net.Dial("tcp", addr)`. The certURI
// represents the identity of the service instance. It will be matched against
// the TLS certificate URI SAN presented by the server and the connection
// rejected if they don't match.
Resolve(ctx context.Context) (addr string, certURI connect.CertURI, err error)
}
// StaticResolver is a statically defined resolver. This can be used to Dial a
// known Connect endpoint without performing service discovery.
type StaticResolver struct {
// Addr is the network address (including port) of the instance. It must be
// the connect-enabled mTLS listener and may be a proxy in front of the actual
// target service process. It is a string in any valid form for passing
// directly to net.Dial("tcp", addr).
Addr string
// CertURL is the identity we expect the server to present in it's TLS
// certificate. It must be an exact URI string match or the connection will be
// rejected.
CertURI connect.CertURI
}
// Resolve implements Resolver by returning the static values.
func (sr *StaticResolver) Resolve(ctx context.Context) (string, connect.CertURI, error) {
return sr.Addr, sr.CertURI, nil
}
const (
// ConsulResolverTypeService indicates resolving healthy service nodes.
ConsulResolverTypeService int = iota
// ConsulResolverTypePreparedQuery indicates resolving via prepared query.
ConsulResolverTypePreparedQuery
)
// ConsulResolver queries Consul for a service instance.
type ConsulResolver struct {
// Client is the Consul API client to use. Must be non-nil or Resolve will
// panic.
Client *api.Client
// Namespace of the query target.
Namespace string
// Name of the query target.
Name string
// Type of the query target. Should be one of the defined ConsulResolverType*
// constants. Currently defaults to ConsulResolverTypeService.
Type int
// Datacenter to resolve in, empty indicates agent's local DC.
Datacenter string
// trustDomain stores the cluster's trust domain it's populated once on first
// Resolve call and blocks all resolutions.
trustDomain string
trustDomainMu sync.Mutex
}
// Resolve performs service discovery against the local Consul agent and returns
// the address and expected identity of a suitable service instance.
func (cr *ConsulResolver) Resolve(ctx context.Context) (string, connect.CertURI, error) {
// Fetch trust domain if we've not done that yet
err := cr.ensureTrustDomain()
if err != nil {
return "", nil, err
}
switch cr.Type {
case ConsulResolverTypeService:
return cr.resolveService(ctx)
case ConsulResolverTypePreparedQuery:
return cr.resolveQuery(ctx)
default:
return "", nil, fmt.Errorf("unknown resolver type")
}
}
func (cr *ConsulResolver) ensureTrustDomain() error {
cr.trustDomainMu.Lock()
defer cr.trustDomainMu.Unlock()
if cr.trustDomain != "" {
return nil
}
roots, _, err := cr.Client.Agent().ConnectCARoots(nil)
if err != nil {
return fmt.Errorf("failed fetching cluster trust domain: %s", err)
}
if roots.TrustDomain == "" {
return fmt.Errorf("cluster trust domain empty, connect not bootstrapped")
}
cr.trustDomain = roots.TrustDomain
return nil
}
func (cr *ConsulResolver) resolveService(ctx context.Context) (string, connect.CertURI, error) {
health := cr.Client.Health()
svcs, _, err := health.Connect(cr.Name, "", true, cr.queryOptions(ctx))
if err != nil {
return "", nil, err
}
if len(svcs) < 1 {
return "", nil, fmt.Errorf("no healthy instances found")
}
// Services are not shuffled by HTTP API, pick one at (pseudo) random.
idx := 0
if len(svcs) > 1 {
idx = rand.Intn(len(svcs))
}
return cr.resolveServiceEntry(svcs[idx])
}
func (cr *ConsulResolver) resolveQuery(ctx context.Context) (string, connect.CertURI, error) {
resp, _, err := cr.Client.PreparedQuery().Execute(cr.Name, cr.queryOptions(ctx))
if err != nil {
return "", nil, err
}
svcs := resp.Nodes
if len(svcs) < 1 {
return "", nil, fmt.Errorf("no healthy instances found")
}
// Services are not shuffled by HTTP API, pick one at (pseudo) random.
idx := 0
if len(svcs) > 1 {
idx = rand.Intn(len(svcs))
}
return cr.resolveServiceEntry(&svcs[idx])
}
func (cr *ConsulResolver) resolveServiceEntry(entry *api.ServiceEntry) (string, connect.CertURI, error) {
addr := entry.Service.Address
if addr == "" {
addr = entry.Node.Address
}
port := entry.Service.Port
service := entry.Service.Proxy.DestinationServiceName
if entry.Service.Connect != nil && entry.Service.Connect.Native {
service = entry.Service.Service
}
if service == "" {
// Shouldn't happen but to protect against bugs in agent API returning bad
// service response...
return "", nil, fmt.Errorf("not a valid connect service")
}
// Generate the expected CertURI
certURI := &connect.SpiffeIDService{
Host: cr.trustDomain,
Namespace: "default",
Datacenter: entry.Node.Datacenter,
Service: service,
}
return fmt.Sprintf("%s:%d", addr, port), certURI, nil
}
func (cr *ConsulResolver) queryOptions(ctx context.Context) *api.QueryOptions {
q := &api.QueryOptions{
// We may make this configurable one day but we may also implement our own
// caching which is even more stale so...
AllowStale: true,
Datacenter: cr.Datacenter,
// For prepared queries
Connect: true,
}
return q.WithContext(ctx)
}
// ConsulResolverFromAddrFunc returns a function for constructing ConsulResolver
// from a consul DNS formatted hostname (e.g. foo.service.consul or
// foo.query.consul).
//
// Note, the returned ConsulResolver resolves the query via regular agent HTTP
// discovery API. DNS is not needed or used for discovery, only the hostname
// format re-used for consistency.
func ConsulResolverFromAddrFunc(client *api.Client) func(addr string) (Resolver, error) {
// Capture client dependency
return func(addr string) (Resolver, error) {
// Http clients might provide hostname and port
host := strings.ToLower(stripPort(addr))
// For now we force use of `.consul` TLD regardless of the configured domain
// on the cluster. That's because we don't know that domain here and it
// would be really complicated to discover it inline here. We do however
// need to be able to distingush a hostname with the optional datacenter
// segment which we can't do unambiguously if we allow arbitrary trailing
// domains.
domain := ".consul"
if !strings.HasSuffix(host, domain) {
return nil, fmt.Errorf("invalid Consul DNS domain: note Connect SDK " +
"currently requires use of .consul domain even if cluster is " +
"configured with a different domain.")
}
// Remove the domain suffix
host = host[0 : len(host)-len(domain)]
parts := strings.Split(host, ".")
numParts := len(parts)
r := &ConsulResolver{
Client: client,
Namespace: "default",
}
// Note that 3 segments may be a valid DNS name like
// <tag>.<service>.service.consul but not one we support, it might also be
// <service>.service.<datacenter>.consul which we do want to support so we
// have to figure out if the last segment is supported keyword and if not
// check if the supported keyword is further up...
// To simplify logic for now, we must match one of the following (not domain
// is stripped):
// <name>.[service|query]
// <name>.[service|query].<dc>
if numParts < 2 || numParts > 3 || !supportedTypeLabel(parts[1]) {
return nil, fmt.Errorf("unsupported Consul DNS domain: must be either " +
"<name>.service[.<datacenter>].consul or " +
"<name>.query[.<datacenter>].consul")
}
if numParts == 3 {
// Must be datacenter case
r.Datacenter = parts[2]
}
// By know we must have a supported query type which means at least 2
// elements with first 2 being name, and type respectively.
r.Name = parts[0]
switch parts[1] {
case "service":
r.Type = ConsulResolverTypeService
case "query":
r.Type = ConsulResolverTypePreparedQuery
default:
// This should never happen (tm) unless the supportedTypeLabel
// implementation is changed and this switch isn't.
return nil, fmt.Errorf("invalid discovery type")
}
return r, nil
}
}
func supportedTypeLabel(label string) bool {
return label == "service" || label == "query"
}
// stripPort copied from net/url/url.go
func stripPort(hostport string) string {
colon := strings.IndexByte(hostport, ':')
if colon == -1 {
return hostport
}
if i := strings.IndexByte(hostport, ']'); i != -1 {
return strings.TrimPrefix(hostport[:i], "[")
}
return hostport[:colon]
}