consul: do not re-register already registered services (#14917)
This PR updates Nomad's Consul service client to do map comparisons using maps.Equal instead of reflect.DeepEqual. The bug fix is in how DeepEqual treats nil slices different from empty slices, when actually they should be treated the same.
This commit is contained in:
parent
3c78980b78
commit
f1b902beac
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:bug
|
||||||
|
consul: Fixed a bug where services continuously re-registered
|
||||||
|
```
|
|
@ -16,10 +16,12 @@ import (
|
||||||
"github.com/armon/go-metrics"
|
"github.com/armon/go-metrics"
|
||||||
"github.com/hashicorp/consul/api"
|
"github.com/hashicorp/consul/api"
|
||||||
"github.com/hashicorp/go-hclog"
|
"github.com/hashicorp/go-hclog"
|
||||||
|
"github.com/hashicorp/go-set"
|
||||||
"github.com/hashicorp/nomad/client/serviceregistration"
|
"github.com/hashicorp/nomad/client/serviceregistration"
|
||||||
"github.com/hashicorp/nomad/helper"
|
"github.com/hashicorp/nomad/helper"
|
||||||
"github.com/hashicorp/nomad/helper/envoy"
|
"github.com/hashicorp/nomad/helper/envoy"
|
||||||
"github.com/hashicorp/nomad/nomad/structs"
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
|
"golang.org/x/exp/maps"
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -28,6 +30,12 @@ const (
|
||||||
// services (both agent and task entries).
|
// services (both agent and task entries).
|
||||||
nomadServicePrefix = "_nomad"
|
nomadServicePrefix = "_nomad"
|
||||||
|
|
||||||
|
// nomadServerPrefix is the prefix that scopes Nomad registered Servers.
|
||||||
|
nomadServerPrefix = nomadServicePrefix + "-server-"
|
||||||
|
|
||||||
|
// nomadClientPrefix is the prefix that scopes Nomad registered Clients.
|
||||||
|
nomadClientPrefix = nomadServicePrefix + "-client-"
|
||||||
|
|
||||||
// nomadTaskPrefix is the prefix that scopes Nomad registered services
|
// nomadTaskPrefix is the prefix that scopes Nomad registered services
|
||||||
// for tasks.
|
// for tasks.
|
||||||
nomadTaskPrefix = nomadServicePrefix + "-task-"
|
nomadTaskPrefix = nomadServicePrefix + "-task-"
|
||||||
|
@ -131,10 +139,7 @@ type ConfigAPI interface {
|
||||||
// ACL requirements
|
// ACL requirements
|
||||||
// - acl:write (server only)
|
// - acl:write (server only)
|
||||||
type ACLsAPI interface {
|
type ACLsAPI interface {
|
||||||
// We are looking up by [operator token] SecretID, which implies we need
|
TokenReadSelf(q *api.QueryOptions) (*api.ACLToken, *api.QueryMeta, error) // for lookup via operator token
|
||||||
// to use this method instead of the normal TokenRead, which can only be
|
|
||||||
// used to lookup tokens by their AccessorID.
|
|
||||||
TokenReadSelf(q *api.QueryOptions) (*api.ACLToken, *api.QueryMeta, error)
|
|
||||||
PolicyRead(policyID string, q *api.QueryOptions) (*api.ACLPolicy, *api.QueryMeta, error)
|
PolicyRead(policyID string, q *api.QueryOptions) (*api.ACLPolicy, *api.QueryMeta, error)
|
||||||
RoleRead(roleID string, q *api.QueryOptions) (*api.ACLRole, *api.QueryMeta, error)
|
RoleRead(roleID string, q *api.QueryOptions) (*api.ACLRole, *api.QueryMeta, error)
|
||||||
TokenCreate(partial *api.ACLToken, q *api.WriteOptions) (*api.ACLToken, *api.WriteMeta, error)
|
TokenCreate(partial *api.ACLToken, q *api.WriteOptions) (*api.ACLToken, *api.WriteMeta, error)
|
||||||
|
@ -170,6 +175,11 @@ func agentServiceUpdateRequired(reason syncReason, wanted *api.AgentServiceRegis
|
||||||
// We do so by over-writing the nomad service registration by the value
|
// We do so by over-writing the nomad service registration by the value
|
||||||
// of the tags that Consul contains, if enable_tag_override = true.
|
// of the tags that Consul contains, if enable_tag_override = true.
|
||||||
maybeTweakTags(wanted, existing, sidecar)
|
maybeTweakTags(wanted, existing, sidecar)
|
||||||
|
|
||||||
|
// Also, purge tagged address fields of nomad agent services.
|
||||||
|
maybeTweakTaggedAddresses(wanted, existing)
|
||||||
|
|
||||||
|
// Okay now it is safe to compare.
|
||||||
return different(wanted, existing, sidecar)
|
return different(wanted, existing, sidecar)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -177,6 +187,11 @@ func agentServiceUpdateRequired(reason syncReason, wanted *api.AgentServiceRegis
|
||||||
// on the queue. This happens when service has been added / removed / modified
|
// on the queue. This happens when service has been added / removed / modified
|
||||||
// and implies the Consul agent should be sync'd with nomad, because
|
// and implies the Consul agent should be sync'd with nomad, because
|
||||||
// nomad is the ultimate source of truth for the service definition.
|
// nomad is the ultimate source of truth for the service definition.
|
||||||
|
|
||||||
|
// But do purge tagged address fields of nomad agent services.
|
||||||
|
maybeTweakTaggedAddresses(wanted, existing)
|
||||||
|
|
||||||
|
// Okay now it is safe to compare.
|
||||||
return different(wanted, existing, sidecar)
|
return different(wanted, existing, sidecar)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -197,6 +212,15 @@ func maybeTweakTags(wanted *api.AgentServiceRegistration, existing *api.AgentSer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// maybeTweakTaggedAddresses will remove the .TaggedAddresses fields from existing
|
||||||
|
// if wanted represents a Nomad agent (Client or Server). We do this because Consul
|
||||||
|
// sets the TaggedAddress on these legacy registrations for us
|
||||||
|
func maybeTweakTaggedAddresses(wanted *api.AgentServiceRegistration, existing *api.AgentService) {
|
||||||
|
if isNomadAgent(wanted.ID) && len(wanted.TaggedAddresses) == 0 {
|
||||||
|
existing.TaggedAddresses = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// different compares the wanted state of the service registration with the actual
|
// different compares the wanted state of the service registration with the actual
|
||||||
// (cached) state of the service registration reported by Consul. If any of the
|
// (cached) state of the service registration reported by Consul. If any of the
|
||||||
// critical fields are not deeply equal, they considered different.
|
// critical fields are not deeply equal, they considered different.
|
||||||
|
@ -214,9 +238,9 @@ func different(wanted *api.AgentServiceRegistration, existing *api.AgentService,
|
||||||
return true
|
return true
|
||||||
case wanted.EnableTagOverride != existing.EnableTagOverride:
|
case wanted.EnableTagOverride != existing.EnableTagOverride:
|
||||||
return true
|
return true
|
||||||
case !reflect.DeepEqual(wanted.Meta, existing.Meta):
|
case !maps.Equal(wanted.Meta, existing.Meta):
|
||||||
return true
|
return true
|
||||||
case !reflect.DeepEqual(wanted.TaggedAddresses, existing.TaggedAddresses):
|
case !maps.Equal(wanted.TaggedAddresses, existing.TaggedAddresses):
|
||||||
return true
|
return true
|
||||||
case !helper.SliceSetEq(wanted.Tags, existing.Tags):
|
case !helper.SliceSetEq(wanted.Tags, existing.Tags):
|
||||||
return true
|
return true
|
||||||
|
@ -384,8 +408,8 @@ type ServiceClient struct {
|
||||||
services map[string]*api.AgentServiceRegistration
|
services map[string]*api.AgentServiceRegistration
|
||||||
checks map[string]*api.AgentCheckRegistration
|
checks map[string]*api.AgentCheckRegistration
|
||||||
|
|
||||||
explicitlyDeregisteredServices map[string]bool
|
explicitlyDeregisteredServices *set.Set[string]
|
||||||
explicitlyDeregisteredChecks map[string]bool
|
explicitlyDeregisteredChecks *set.Set[string]
|
||||||
|
|
||||||
// allocRegistrations stores the services and checks that are registered
|
// allocRegistrations stores the services and checks that are registered
|
||||||
// with Consul by allocation ID.
|
// with Consul by allocation ID.
|
||||||
|
@ -394,8 +418,8 @@ type ServiceClient struct {
|
||||||
|
|
||||||
// Nomad agent services and checks that are recorded so they can be removed
|
// Nomad agent services and checks that are recorded so they can be removed
|
||||||
// on shutdown. Defers to consul namespace specified in client consul config.
|
// on shutdown. Defers to consul namespace specified in client consul config.
|
||||||
agentServices map[string]struct{}
|
agentServices *set.Set[string]
|
||||||
agentChecks map[string]struct{}
|
agentChecks *set.Set[string]
|
||||||
agentLock sync.Mutex
|
agentLock sync.Mutex
|
||||||
|
|
||||||
// seen is 1 if Consul has ever been seen; otherwise 0. Accessed with
|
// seen is 1 if Consul has ever been seen; otherwise 0. Accessed with
|
||||||
|
@ -461,11 +485,11 @@ func NewServiceClient(agentAPI AgentAPI, namespacesClient *NamespacesClient, log
|
||||||
opCh: make(chan *operations, 8),
|
opCh: make(chan *operations, 8),
|
||||||
services: make(map[string]*api.AgentServiceRegistration),
|
services: make(map[string]*api.AgentServiceRegistration),
|
||||||
checks: make(map[string]*api.AgentCheckRegistration),
|
checks: make(map[string]*api.AgentCheckRegistration),
|
||||||
explicitlyDeregisteredServices: make(map[string]bool),
|
explicitlyDeregisteredServices: set.New[string](0),
|
||||||
explicitlyDeregisteredChecks: make(map[string]bool),
|
explicitlyDeregisteredChecks: set.New[string](0),
|
||||||
allocRegistrations: make(map[string]*serviceregistration.AllocRegistration),
|
allocRegistrations: make(map[string]*serviceregistration.AllocRegistration),
|
||||||
agentServices: make(map[string]struct{}),
|
agentServices: set.New[string](4),
|
||||||
agentChecks: make(map[string]struct{}),
|
agentChecks: set.New[string](0),
|
||||||
isClientAgent: isNomadClient,
|
isClientAgent: isNomadClient,
|
||||||
deregisterProbationExpiry: time.Now().Add(deregisterProbationPeriod),
|
deregisterProbationExpiry: time.Now().Add(deregisterProbationPeriod),
|
||||||
checkWatcher: serviceregistration.NewCheckWatcher(logger, &checkStatusGetter{
|
checkWatcher: serviceregistration.NewCheckWatcher(logger, &checkStatusGetter{
|
||||||
|
@ -656,8 +680,8 @@ func (c *ServiceClient) commit(ops *operations) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *ServiceClient) clearExplicitlyDeregistered() {
|
func (c *ServiceClient) clearExplicitlyDeregistered() {
|
||||||
c.explicitlyDeregisteredServices = make(map[string]bool)
|
c.explicitlyDeregisteredServices = set.New[string](0)
|
||||||
c.explicitlyDeregisteredChecks = make(map[string]bool)
|
c.explicitlyDeregisteredChecks = set.New[string](0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// merge registrations into state map prior to sync'ing with Consul
|
// merge registrations into state map prior to sync'ing with Consul
|
||||||
|
@ -670,11 +694,11 @@ func (c *ServiceClient) merge(ops *operations) {
|
||||||
}
|
}
|
||||||
for _, sid := range ops.deregServices {
|
for _, sid := range ops.deregServices {
|
||||||
delete(c.services, sid)
|
delete(c.services, sid)
|
||||||
c.explicitlyDeregisteredServices[sid] = true
|
c.explicitlyDeregisteredServices.Insert(sid)
|
||||||
}
|
}
|
||||||
for _, cid := range ops.deregChecks {
|
for _, cid := range ops.deregChecks {
|
||||||
delete(c.checks, cid)
|
delete(c.checks, cid)
|
||||||
c.explicitlyDeregisteredChecks[cid] = true
|
c.explicitlyDeregisteredChecks.Insert(cid)
|
||||||
}
|
}
|
||||||
metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services)))
|
metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services)))
|
||||||
metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks)))
|
metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks)))
|
||||||
|
@ -728,7 +752,7 @@ func (c *ServiceClient) sync(reason syncReason) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ignore unknown services during probation
|
// Ignore unknown services during probation
|
||||||
if inProbation && !c.explicitlyDeregisteredServices[id] {
|
if inProbation && !c.explicitlyDeregisteredServices.Contains(id) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -767,9 +791,8 @@ func (c *ServiceClient) sync(reason syncReason) error {
|
||||||
metrics.IncrCounter([]string{"client", "consul", "service_deregistrations"}, 1)
|
metrics.IncrCounter([]string{"client", "consul", "service_deregistrations"}, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add Nomad services missing from Consul, or where the service has been updated.
|
// Add Nomad managed services missing in Consul, or updated via Nomad.
|
||||||
for id, serviceInNomad := range c.services {
|
for id, serviceInNomad := range c.services {
|
||||||
|
|
||||||
serviceInConsul, exists := servicesInConsul[id]
|
serviceInConsul, exists := servicesInConsul[id]
|
||||||
sidecarInConsul := getNomadSidecar(id, servicesInConsul)
|
sidecarInConsul := getNomadSidecar(id, servicesInConsul)
|
||||||
|
|
||||||
|
@ -813,7 +836,7 @@ func (c *ServiceClient) sync(reason syncReason) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ignore unknown services during probation
|
// Ignore unknown services during probation
|
||||||
if inProbation && !c.explicitlyDeregisteredChecks[id] {
|
if inProbation && !c.explicitlyDeregisteredChecks.Contains(id) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -938,10 +961,10 @@ func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service)
|
||||||
|
|
||||||
// Record IDs for deregistering on shutdown
|
// Record IDs for deregistering on shutdown
|
||||||
for _, id := range ops.regServices {
|
for _, id := range ops.regServices {
|
||||||
c.agentServices[id.ID] = struct{}{}
|
c.agentServices.Insert(id.ID)
|
||||||
}
|
}
|
||||||
for _, id := range ops.regChecks {
|
for _, id := range ops.regChecks {
|
||||||
c.agentChecks[id.ID] = struct{}{}
|
c.agentChecks.Insert(id.ID)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -1432,7 +1455,7 @@ func (c *ServiceClient) Shutdown() error {
|
||||||
|
|
||||||
// Always attempt to deregister Nomad agent Consul entries, even if
|
// Always attempt to deregister Nomad agent Consul entries, even if
|
||||||
// deadline was reached
|
// deadline was reached
|
||||||
for id := range c.agentServices {
|
for _, id := range c.agentServices.List() {
|
||||||
if err := c.agentAPI.ServiceDeregisterOpts(id, nil); err != nil {
|
if err := c.agentAPI.ServiceDeregisterOpts(id, nil); err != nil {
|
||||||
c.logger.Error("failed deregistering agent service", "service_id", id, "error", err)
|
c.logger.Error("failed deregistering agent service", "service_id", id, "error", err)
|
||||||
}
|
}
|
||||||
|
@ -1463,7 +1486,7 @@ func (c *ServiceClient) Shutdown() error {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
for id := range c.agentChecks {
|
for _, id := range c.agentChecks.List() {
|
||||||
// if we couldn't populate remainingChecks it is unlikely that CheckDeregister will work, but try anyway
|
// if we couldn't populate remainingChecks it is unlikely that CheckDeregister will work, but try anyway
|
||||||
// if we could list the remaining checks, verify that the check we store still exists before removing it.
|
// if we could list the remaining checks, verify that the check we store still exists before removing it.
|
||||||
if remainingChecks == nil || checkRemains(id) {
|
if remainingChecks == nil || checkRemains(id) {
|
||||||
|
@ -1595,10 +1618,19 @@ func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host
|
||||||
return &chkReg, nil
|
return &chkReg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// isNomadCheck returns true if the ID matches the pattern of a Nomad managed
|
// isNomadClient returns true if id represents a Nomad Client registration.
|
||||||
// check.
|
func isNomadClient(id string) bool {
|
||||||
func isNomadCheck(id string) bool {
|
return strings.HasPrefix(id, nomadClientPrefix)
|
||||||
return strings.HasPrefix(id, nomadCheckPrefix)
|
}
|
||||||
|
|
||||||
|
// isNomadServer returns true if id represents a Nomad Server registration.
|
||||||
|
func isNomadServer(id string) bool {
|
||||||
|
return strings.HasPrefix(id, nomadServerPrefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
// isNomadAgent returns true if id represents a Nomad Client or Server registration.
|
||||||
|
func isNomadAgent(id string) bool {
|
||||||
|
return isNomadClient(id) || isNomadServer(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// isNomadService returns true if the ID matches the pattern of a Nomad managed
|
// isNomadService returns true if the ID matches the pattern of a Nomad managed
|
||||||
|
@ -1608,6 +1640,12 @@ func isNomadService(id string) bool {
|
||||||
return strings.HasPrefix(id, nomadTaskPrefix) || isOldNomadService(id)
|
return strings.HasPrefix(id, nomadTaskPrefix) || isOldNomadService(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isNomadCheck returns true if the ID matches the pattern of a Nomad managed
|
||||||
|
// check.
|
||||||
|
func isNomadCheck(id string) bool {
|
||||||
|
return strings.HasPrefix(id, nomadCheckPrefix)
|
||||||
|
}
|
||||||
|
|
||||||
// isOldNomadService returns true if the ID matches an old pattern managed by
|
// isOldNomadService returns true if the ID matches an old pattern managed by
|
||||||
// Nomad.
|
// Nomad.
|
||||||
//
|
//
|
||||||
|
|
Loading…
Reference in New Issue