open-consul/agent/proxycfg/mesh_gateway.go

311 lines
10 KiB
Go

package proxycfg
import (
"context"
"fmt"
"strings"
"time"
"github.com/hashicorp/consul/agent/cache"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/logging"
)
type handlerMeshGateway struct {
handlerState
}
// initialize sets up the watches needed based on the current mesh gateway registration
func (s *handlerMeshGateway) initialize(ctx context.Context) (ConfigSnapshot, error) {
snap := newConfigSnapshotFromServiceInstance(s.serviceInstance, s.stateConfig)
// Watch for root changes
err := s.cache.Notify(ctx, cachetype.ConnectCARootName, &structs.DCSpecificRequest{
Datacenter: s.source.Datacenter,
QueryOptions: structs.QueryOptions{Token: s.token},
Source: *s.source,
}, rootsWatchID, s.ch)
if err != nil {
return snap, err
}
wildcardEntMeta := s.proxyID.WildcardEnterpriseMetaForPartition()
// Watch for all services
err = s.cache.Notify(ctx, cachetype.CatalogServiceListName, &structs.DCSpecificRequest{
Datacenter: s.source.Datacenter,
QueryOptions: structs.QueryOptions{Token: s.token},
Source: *s.source,
EnterpriseMeta: *wildcardEntMeta,
}, serviceListWatchID, s.ch)
if err != nil {
return snap, err
}
if s.meta[structs.MetaWANFederationKey] == "1" {
// Conveniently we can just use this service meta attribute in one
// place here to set the machinery in motion and leave the conditional
// behavior out of the rest of the package.
err = s.cache.Notify(ctx, cachetype.FederationStateListMeshGatewaysName, &structs.DCSpecificRequest{
Datacenter: s.source.Datacenter,
QueryOptions: structs.QueryOptions{Token: s.token},
Source: *s.source,
}, federationStateListGatewaysWatchID, s.ch)
if err != nil {
return snap, err
}
err = s.health.Notify(ctx, structs.ServiceSpecificRequest{
Datacenter: s.source.Datacenter,
QueryOptions: structs.QueryOptions{Token: s.token},
ServiceName: structs.ConsulServiceName,
}, consulServerListWatchID, s.ch)
if err != nil {
return snap, err
}
}
// Eventually we will have to watch connect enable instances for each service as well as the
// destination services themselves but those notifications will be setup later. However we
// cannot setup those watches until we know what the services are. from the service list
// watch above
err = s.cache.Notify(ctx, cachetype.CatalogDatacentersName, &structs.DatacentersRequest{
QueryOptions: structs.QueryOptions{Token: s.token, MaxAge: 30 * time.Second},
}, datacentersWatchID, s.ch)
if err != nil {
return snap, err
}
// Once we start getting notified about the datacenters we will setup watches on the
// gateways within those other datacenters. We cannot do that here because we don't
// know what they are yet.
// Watch service-resolvers so we can setup service subset clusters
err = s.cache.Notify(ctx, cachetype.ConfigEntriesName, &structs.ConfigEntryQuery{
Datacenter: s.source.Datacenter,
QueryOptions: structs.QueryOptions{Token: s.token},
Kind: structs.ServiceResolver,
EnterpriseMeta: *wildcardEntMeta,
}, serviceResolversWatchID, s.ch)
if err != nil {
s.logger.Named(logging.MeshGateway).
Error("failed to register watch for service-resolver config entries", "error", err)
return snap, err
}
snap.MeshGateway.WatchedServices = make(map[structs.ServiceName]context.CancelFunc)
snap.MeshGateway.WatchedDatacenters = make(map[string]context.CancelFunc)
snap.MeshGateway.ServiceGroups = make(map[structs.ServiceName]structs.CheckServiceNodes)
snap.MeshGateway.GatewayGroups = make(map[string]structs.CheckServiceNodes)
snap.MeshGateway.ServiceResolvers = make(map[structs.ServiceName]*structs.ServiceResolverConfigEntry)
snap.MeshGateway.HostnameDatacenters = make(map[string]structs.CheckServiceNodes)
// there is no need to initialize the map of service resolvers as we
// fully rebuild it every time we get updates
return snap, err
}
func (s *handlerMeshGateway) handleUpdate(ctx context.Context, u cache.UpdateEvent, snap *ConfigSnapshot) error {
if u.Err != nil {
return fmt.Errorf("error filling agent cache: %v", u.Err)
}
meshLogger := s.logger.Named(logging.MeshGateway)
switch u.CorrelationID {
case rootsWatchID:
roots, ok := u.Result.(*structs.IndexedCARoots)
if !ok {
return fmt.Errorf("invalid type for response: %T", u.Result)
}
snap.Roots = roots
case federationStateListGatewaysWatchID:
dcIndexedNodes, ok := u.Result.(*structs.DatacenterIndexedCheckServiceNodes)
if !ok {
return fmt.Errorf("invalid type for response: %T", u.Result)
}
snap.MeshGateway.FedStateGateways = dcIndexedNodes.DatacenterNodes
for dc, nodes := range dcIndexedNodes.DatacenterNodes {
snap.MeshGateway.HostnameDatacenters[dc] = hostnameEndpoints(
s.logger.Named(logging.MeshGateway), snap.Datacenter, nodes)
}
for dc := range snap.MeshGateway.HostnameDatacenters {
if _, ok := dcIndexedNodes.DatacenterNodes[dc]; !ok {
delete(snap.MeshGateway.HostnameDatacenters, dc)
}
}
case serviceListWatchID:
services, ok := u.Result.(*structs.IndexedServiceList)
if !ok {
return fmt.Errorf("invalid type for response: %T", u.Result)
}
svcMap := make(map[structs.ServiceName]struct{})
for _, svc := range services.Services {
// Make sure to add every service to this map, we use it to cancel
// watches below.
svcMap[svc] = struct{}{}
if _, ok := snap.MeshGateway.WatchedServices[svc]; !ok {
ctx, cancel := context.WithCancel(ctx)
err := s.health.Notify(ctx, structs.ServiceSpecificRequest{
Datacenter: s.source.Datacenter,
QueryOptions: structs.QueryOptions{Token: s.token},
ServiceName: svc.Name,
Connect: true,
EnterpriseMeta: svc.EnterpriseMeta,
}, fmt.Sprintf("connect-service:%s", svc.String()), s.ch)
if err != nil {
meshLogger.Error("failed to register watch for connect-service",
"service", svc.String(),
"error", err,
)
cancel()
return err
}
snap.MeshGateway.WatchedServices[svc] = cancel
}
}
for sid, cancelFn := range snap.MeshGateway.WatchedServices {
if _, ok := svcMap[sid]; !ok {
meshLogger.Debug("canceling watch for service", "service", sid.String())
// TODO (gateways) Should the sid also be deleted from snap.MeshGateway.ServiceGroups?
// Do those endpoints get cleaned up some other way?
delete(snap.MeshGateway.WatchedServices, sid)
cancelFn()
}
}
snap.MeshGateway.WatchedServicesSet = true
case datacentersWatchID:
datacentersRaw, ok := u.Result.(*[]string)
if !ok {
return fmt.Errorf("invalid type for response: %T", u.Result)
}
if datacentersRaw == nil {
return fmt.Errorf("invalid response with a nil datacenter list")
}
datacenters := *datacentersRaw
for _, dc := range datacenters {
if dc == s.source.Datacenter {
continue
}
if _, ok := snap.MeshGateway.WatchedDatacenters[dc]; !ok {
ctx, cancel := context.WithCancel(ctx)
err := s.cache.Notify(ctx, cachetype.InternalServiceDumpName, &structs.ServiceDumpRequest{
Datacenter: dc,
QueryOptions: structs.QueryOptions{Token: s.token},
ServiceKind: structs.ServiceKindMeshGateway,
UseServiceKind: true,
Source: *s.source,
EnterpriseMeta: *structs.DefaultEnterpriseMetaInDefaultPartition(),
}, fmt.Sprintf("mesh-gateway:%s", dc), s.ch)
if err != nil {
meshLogger.Error("failed to register watch for mesh-gateway",
"datacenter", dc,
"error", err,
)
cancel()
return err
}
snap.MeshGateway.WatchedDatacenters[dc] = cancel
}
}
for dc, cancelFn := range snap.MeshGateway.WatchedDatacenters {
found := false
for _, dcCurrent := range datacenters {
if dcCurrent == dc {
found = true
break
}
}
if !found {
delete(snap.MeshGateway.WatchedDatacenters, dc)
cancelFn()
}
}
case serviceResolversWatchID:
configEntries, ok := u.Result.(*structs.IndexedConfigEntries)
if !ok {
return fmt.Errorf("invalid type for response: %T", u.Result)
}
resolvers := make(map[structs.ServiceName]*structs.ServiceResolverConfigEntry)
for _, entry := range configEntries.Entries {
if resolver, ok := entry.(*structs.ServiceResolverConfigEntry); ok {
resolvers[structs.NewServiceName(resolver.Name, &resolver.EnterpriseMeta)] = resolver
}
}
snap.MeshGateway.ServiceResolvers = resolvers
case consulServerListWatchID:
resp, ok := u.Result.(*structs.IndexedCheckServiceNodes)
if !ok {
return fmt.Errorf("invalid type for response: %T", u.Result)
}
// Do some initial sanity checks to avoid doing something dumb.
for _, csn := range resp.Nodes {
if csn.Service.Service != structs.ConsulServiceName {
return fmt.Errorf("expected service name %q but got %q",
structs.ConsulServiceName, csn.Service.Service)
}
if csn.Node.Datacenter != snap.Datacenter {
return fmt.Errorf("expected datacenter %q but got %q",
snap.Datacenter, csn.Node.Datacenter)
}
}
snap.MeshGateway.ConsulServers = resp.Nodes
default:
switch {
case strings.HasPrefix(u.CorrelationID, "connect-service:"):
resp, ok := u.Result.(*structs.IndexedCheckServiceNodes)
if !ok {
return fmt.Errorf("invalid type for response: %T", u.Result)
}
sn := structs.ServiceNameFromString(strings.TrimPrefix(u.CorrelationID, "connect-service:"))
if len(resp.Nodes) > 0 {
snap.MeshGateway.ServiceGroups[sn] = resp.Nodes
} else if _, ok := snap.MeshGateway.ServiceGroups[sn]; ok {
delete(snap.MeshGateway.ServiceGroups, sn)
}
case strings.HasPrefix(u.CorrelationID, "mesh-gateway:"):
resp, ok := u.Result.(*structs.IndexedNodesWithGateways)
if !ok {
return fmt.Errorf("invalid type for response: %T", u.Result)
}
dc := strings.TrimPrefix(u.CorrelationID, "mesh-gateway:")
delete(snap.MeshGateway.GatewayGroups, dc)
delete(snap.MeshGateway.HostnameDatacenters, dc)
if len(resp.Nodes) > 0 {
snap.MeshGateway.GatewayGroups[dc] = resp.Nodes
snap.MeshGateway.HostnameDatacenters[dc] = hostnameEndpoints(
s.logger.Named(logging.MeshGateway), snap.Datacenter, resp.Nodes)
}
default:
// do nothing for now
}
}
return nil
}