Remove unnecessary locking and serverlist syncing in heartbeats

This removes an unnecessary shared lock between discovery and heartbeating
which was causing heartbeats to be missed upon retries when a single server
fails. Also made a drive by fix to call the periodic server shuffler goroutine.
This commit is contained in:
Preetha Appan 2019-05-06 14:44:55 -05:00
parent 9f3f11df97
commit b063fc81a4
No known key found for this signature in database
GPG Key ID: 9F7C19990A50EAFC
4 changed files with 30 additions and 9 deletions

View File

@ -315,6 +315,9 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
// Initialize the server manager // Initialize the server manager
c.servers = servers.New(c.logger, c.shutdownCh, c) c.servers = servers.New(c.logger, c.shutdownCh, c)
// Start server manager rebalancing go routine
go c.servers.Start()
// Initialize the client // Initialize the client
if err := c.init(); err != nil { if err := c.init(); err != nil {
return nil, fmt.Errorf("failed to initialize client: %v", err) return nil, fmt.Errorf("failed to initialize client: %v", err)
@ -1345,7 +1348,6 @@ func (c *Client) registerAndHeartbeat() {
case <-c.shutdownCh: case <-c.shutdownCh:
return return
} }
if err := c.updateNodeStatus(); err != nil { if err := c.updateNodeStatus(); err != nil {
// The servers have changed such that this node has not been // The servers have changed such that this node has not been
// registered before // registered before
@ -2342,13 +2344,6 @@ func (c *Client) consulDiscovery() {
func (c *Client) consulDiscoveryImpl() error { func (c *Client) consulDiscoveryImpl() error {
consulLogger := c.logger.Named("consul") consulLogger := c.logger.Named("consul")
// Acquire heartbeat lock to prevent heartbeat from running
// concurrently with discovery. Concurrent execution is safe, however
// discovery is usually triggered when heartbeating has failed so
// there's no point in allowing it.
c.heartbeatLock.Lock()
defer c.heartbeatLock.Unlock()
dcs, err := c.consulCatalog.Datacenters() dcs, err := c.consulCatalog.Datacenters()
if err != nil { if err != nil {
return fmt.Errorf("client.consul: unable to query Consul datacenters: %v", err) return fmt.Errorf("client.consul: unable to query Consul datacenters: %v", err)
@ -2432,6 +2427,26 @@ DISCOLOOP:
consulLogger.Info("discovered following servers", "servers", nomadServers) consulLogger.Info("discovered following servers", "servers", nomadServers)
// Check if the list of servers discovered is identical to the list we already have
// If so, we don't need to reset the server list unnecessarily
knownServers := make(map[string]struct{})
serverList := c.servers.GetServers()
for _, s := range serverList {
knownServers[s.Addr.String()] = struct{}{}
}
allFound := true
for _, s := range nomadServers {
_, known := knownServers[s.Addr.String()]
if !known {
allFound = false
break
}
}
if allFound && len(nomadServers) == len(serverList) {
c.logger.Info("Not replacing server list, current server list is identical to servers discovered in Consul")
return nil
}
// Fire the retry trigger if we have updated the set of servers. // Fire the retry trigger if we have updated the set of servers.
if c.servers.SetServers(nomadServers) { if c.servers.SetServers(nomadServers) {
// Start rebalancing // Start rebalancing

View File

@ -7,6 +7,9 @@ data_dir = "/tmp/client1"
# Give the agent a unique name. Defaults to hostname # Give the agent a unique name. Defaults to hostname
name = "client1" name = "client1"
# Enable debugging
enable_debug = true
# Enable the client # Enable the client
client { client {
enabled = true enabled = true

View File

@ -7,6 +7,9 @@ data_dir = "/tmp/client2"
# Give the agent a unique name. Defaults to hostname # Give the agent a unique name. Defaults to hostname
name = "client2" name = "client2"
# Enable debugging
enable_debug = true
# Enable the client # Enable the client
client { client {
enabled = true enabled = true

View File

@ -16,5 +16,5 @@ server {
} }
# Self-elect, should be 3 or 5 for production # Self-elect, should be 3 or 5 for production
bootstrap_expect = 3 bootstrap_expect = 1
} }