rpcclient: Add health.Client and use it in http and dns

This new package provides a client agent implementation of an interface
for fetching the health of services.

This approach has a number of benefits:

1. It provides a much more explicit interface. Instead of everything
   dependency on `RPC()` and `Cache.Get()` for many unrelated things
   they can depend on a type that are named according to the behaviour
   it provides.

2. It gives us a single place to vary the behaviour and migrate to
   a new form of RPC (gRPC). The current implementation has two options
   (cache, or direct RPC), and in the future we will have more.
   It is also a great opporunity to start adding `context.Context` args
   to these operations, which in the future will allow us to cancel
   the operations.

3. As a concequence of the first, in the Server agent where we make
   these calls we can replace the current in-memory RPC calls with
   a thin adapter for the real method. This removes the `net/rpc`
   machinery from the call in places where it is not needed.

This new package is quite small right now, but I think we can expect it
to grow to a more reasonable size as other RPC calls are replaced.

This change also happens to replace two very similar implementations with
a single implementation.
This commit is contained in:
Daniel Nephin 2020-09-30 17:38:13 -04:00
parent a442fcd6cd
commit 04c5b9adf5
4 changed files with 101 additions and 75 deletions

View File

@ -17,15 +17,16 @@ import (
"sync"
"time"
"github.com/hashicorp/consul/agent/dns"
"github.com/hashicorp/consul/agent/token"
"github.com/armon/go-metrics"
"github.com/hashicorp/go-connlimit"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf"
"golang.org/x/net/http2"
"google.golang.org/grpc"
"github.com/armon/go-metrics"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/ae"
"github.com/hashicorp/consul/agent/cache"
@ -33,10 +34,13 @@ import (
"github.com/hashicorp/consul/agent/checks"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/dns"
"github.com/hashicorp/consul/agent/local"
"github.com/hashicorp/consul/agent/proxycfg"
"github.com/hashicorp/consul/agent/rpcclient/health"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/systemd"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/agent/xds"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/api/watch"
@ -46,10 +50,6 @@ import (
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/tlsutil"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf"
"golang.org/x/net/http2"
)
const (
@ -311,6 +311,10 @@ type Agent struct {
// they can update their internal state.
configReloaders []ConfigReloader
// TODO: pass directly to HTTPHandlers and DNSServer once those are passed
// into Agent, which will allow us to remove this field.
rpcClientHealth *health.Client
// enterpriseAgent embeds fields that we only access in consul-enterprise builds
enterpriseAgent
}
@ -355,6 +359,8 @@ func New(bd BaseDeps) (*Agent, error) {
cache: bd.Cache,
}
a.rpcClientHealth = &health.Client{Cache: bd.Cache, NetRPC: &a}
a.serviceManager = NewServiceManager(&a)
// TODO: do this somewhere else, maybe move to newBaseDeps

View File

@ -13,6 +13,9 @@ import (
metrics "github.com/armon/go-metrics"
radix "github.com/armon/go-radix"
"github.com/coredns/coredns/plugin/pkg/dnsutil"
"github.com/hashicorp/go-hclog"
"github.com/miekg/dns"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/config"
agentdns "github.com/hashicorp/consul/agent/dns"
@ -21,8 +24,6 @@ import (
"github.com/hashicorp/consul/ipaddr"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/go-hclog"
"github.com/miekg/dns"
)
const (
@ -1159,49 +1160,18 @@ func (d *DNSServer) lookupServiceNodes(cfg *dnsConfig, lookup serviceLookup) (st
ServiceTags: []string{lookup.Tag},
TagFilter: lookup.Tag != "",
QueryOptions: structs.QueryOptions{
Token: d.agent.tokens.UserToken(),
AllowStale: cfg.AllowStale,
MaxAge: cfg.CacheMaxAge,
Token: d.agent.tokens.UserToken(),
AllowStale: cfg.AllowStale,
MaxAge: cfg.CacheMaxAge,
UseCache: cfg.UseCache,
MaxStaleDuration: cfg.MaxStale,
},
EnterpriseMeta: lookup.EnterpriseMeta,
}
var out structs.IndexedCheckServiceNodes
if cfg.UseCache {
raw, m, err := d.agent.cache.Get(context.TODO(), cachetype.HealthServicesName, &args)
if err != nil {
return out, err
}
reply, ok := raw.(*structs.IndexedCheckServiceNodes)
if !ok {
// This should never happen, but we want to protect against panics
return out, fmt.Errorf("internal error: response type not correct")
}
d.logger.Trace("cache results for service",
"cache_hit", m.Hit,
"service", lookup.Service,
)
out = *reply
} else {
if err := d.agent.RPC("Health.ServiceNodes", &args, &out); err != nil {
return out, err
}
}
if args.AllowStale && out.LastContact > staleCounterThreshold {
metrics.IncrCounter([]string{"dns", "stale_queries"}, 1)
}
// redo the request the response was too stale
if args.AllowStale && out.LastContact > cfg.MaxStale {
args.AllowStale = false
d.logger.Warn("Query results too stale, re-requesting")
if err := d.agent.RPC("Health.ServiceNodes", &args, &out); err != nil {
return structs.IndexedCheckServiceNodes{}, err
}
out, _, err := d.agent.rpcClientHealth.ServiceNodes(context.TODO(), args)
if err != nil {
return out, err
}
// Filter out any service nodes due to health checks

View File

@ -7,7 +7,6 @@ import (
"strconv"
"strings"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/api"
)
@ -220,35 +219,21 @@ func (s *HTTPHandlers) healthServiceNodes(resp http.ResponseWriter, req *http.Re
return nil, nil
}
// Make the RPC request
var out structs.IndexedCheckServiceNodes
defer setMeta(resp, &out.QueryMeta)
// TODO: handle this for all endpoints in parseConsistency
args.QueryOptions.UseCache = s.agent.config.HTTPUseCache && args.QueryOptions.UseCache
if s.agent.config.HTTPUseCache && args.QueryOptions.UseCache {
raw, m, err := s.agent.cache.Get(req.Context(), cachetype.HealthServicesName, &args)
if err != nil {
return nil, err
}
defer setCacheMeta(resp, &m)
reply, ok := raw.(*structs.IndexedCheckServiceNodes)
if !ok {
// This should never happen, but we want to protect against panics
return nil, fmt.Errorf("internal error: response type not correct")
}
out = *reply
} else {
RETRY_ONCE:
if err := s.agent.RPC("Health.ServiceNodes", &args, &out); err != nil {
return nil, err
}
if args.QueryOptions.AllowStale && args.MaxStaleDuration > 0 && args.MaxStaleDuration < out.LastContact {
args.AllowStale = false
args.MaxStaleDuration = 0
goto RETRY_ONCE
}
out, md, err := s.agent.rpcClientHealth.ServiceNodes(req.Context(), args)
if err != nil {
return nil, err
}
if args.QueryOptions.UseCache {
setCacheMeta(resp, &md)
}
setMeta(resp, &out.QueryMeta)
out.ConsistencyLevel = args.QueryOptions.ConsistencyLevel()
// FIXME: argument parsing should be done before performing the rpc
// Filter to only passing if specified
filter, err := getBoolQueryParam(params, api.HealthPassing)
if err != nil {
@ -257,6 +242,7 @@ func (s *HTTPHandlers) healthServiceNodes(resp http.ResponseWriter, req *http.Re
return nil, nil
}
// FIXME: remove filterNonPassing, replace with nodes.Filter, which is used by DNSServer
if filter {
out.Nodes = filterNonPassing(out.Nodes)
}

View File

@ -0,0 +1,64 @@
package health
import (
"context"
"github.com/hashicorp/consul/agent/cache"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/structs"
)
type Client struct {
NetRPC NetRPC
Cache CacheGetter
}
type NetRPC interface {
RPC(method string, args interface{}, reply interface{}) error
}
type CacheGetter interface {
Get(ctx context.Context, t string, r cache.Request) (interface{}, cache.ResultMeta, error)
}
func (c *Client) ServiceNodes(
ctx context.Context,
req structs.ServiceSpecificRequest,
) (structs.IndexedCheckServiceNodes, cache.ResultMeta, error) {
out, md, err := c.getServiceNodes(ctx, req)
if err != nil {
return out, md, err
}
// TODO: DNSServer emitted a metric here, do we still need it?
if req.QueryOptions.AllowStale && req.QueryOptions.MaxStaleDuration > 0 && out.QueryMeta.LastContact > req.MaxStaleDuration {
req.AllowStale = false
err := c.NetRPC.RPC("Health.ServiceNodes", &req, &out)
return out, cache.ResultMeta{}, err
}
return out, md, err
}
func (c *Client) getServiceNodes(
ctx context.Context,
req structs.ServiceSpecificRequest,
) (structs.IndexedCheckServiceNodes, cache.ResultMeta, error) {
var out structs.IndexedCheckServiceNodes
if !req.QueryOptions.UseCache {
err := c.NetRPC.RPC("Health.ServiceNodes", &req, &out)
return out, cache.ResultMeta{}, err
}
raw, md, err := c.Cache.Get(ctx, cachetype.HealthServicesName, &req)
if err != nil {
return out, md, err
}
value, ok := raw.(*structs.IndexedCheckServiceNodes)
if !ok {
panic("wrong response type for cachetype.HealthServicesName")
}
return *value, md, nil
}