Remove unnecessary locking and serverlist syncing in heartbeats

This removes an unnecessary shared lock between discovery and heartbeating
which was causing heartbeats to be missed upon retries when a single server
fails. Also made a drive by fix to call the periodic server shuffler goroutine.
This commit is contained in:
Preetha Appan 2019-05-06 14:44:55 -05:00
parent 9f3f11df97
commit b063fc81a4
No known key found for this signature in database
GPG key ID: 9F7C19990A50EAFC
4 changed files with 30 additions and 9 deletions

View file

@ -315,6 +315,9 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
// Initialize the server manager
c.servers = servers.New(c.logger, c.shutdownCh, c)
// Start server manager rebalancing go routine
go c.servers.Start()
// Initialize the client
if err := c.init(); err != nil {
return nil, fmt.Errorf("failed to initialize client: %v", err)
@ -1345,7 +1348,6 @@ func (c *Client) registerAndHeartbeat() {
case <-c.shutdownCh:
return
}
if err := c.updateNodeStatus(); err != nil {
// The servers have changed such that this node has not been
// registered before
@ -2342,13 +2344,6 @@ func (c *Client) consulDiscovery() {
func (c *Client) consulDiscoveryImpl() error {
consulLogger := c.logger.Named("consul")
// Acquire heartbeat lock to prevent heartbeat from running
// concurrently with discovery. Concurrent execution is safe, however
// discovery is usually triggered when heartbeating has failed so
// there's no point in allowing it.
c.heartbeatLock.Lock()
defer c.heartbeatLock.Unlock()
dcs, err := c.consulCatalog.Datacenters()
if err != nil {
return fmt.Errorf("client.consul: unable to query Consul datacenters: %v", err)
@ -2432,6 +2427,26 @@ DISCOLOOP:
consulLogger.Info("discovered following servers", "servers", nomadServers)
// Check if the list of servers discovered is identical to the list we already have
// If so, we don't need to reset the server list unnecessarily
knownServers := make(map[string]struct{})
serverList := c.servers.GetServers()
for _, s := range serverList {
knownServers[s.Addr.String()] = struct{}{}
}
allFound := true
for _, s := range nomadServers {
_, known := knownServers[s.Addr.String()]
if !known {
allFound = false
break
}
}
if allFound && len(nomadServers) == len(serverList) {
c.logger.Info("Not replacing server list, current server list is identical to servers discovered in Consul")
return nil
}
// Fire the retry trigger if we have updated the set of servers.
if c.servers.SetServers(nomadServers) {
// Start rebalancing

View file

@ -7,6 +7,9 @@ data_dir = "/tmp/client1"
# Give the agent a unique name. Defaults to hostname
name = "client1"
# Enable debugging
enable_debug = true
# Enable the client
client {
enabled = true

View file

@ -7,6 +7,9 @@ data_dir = "/tmp/client2"
# Give the agent a unique name. Defaults to hostname
name = "client2"
# Enable debugging
enable_debug = true
# Enable the client
client {
enabled = true

View file

@ -16,5 +16,5 @@ server {
}
# Self-elect, should be 3 or 5 for production
bootstrap_expect = 3
bootstrap_expect = 1
}