Remove unnecessary locking and serverlist syncing in heartbeats
This removes an unnecessary shared lock between discovery and heartbeating which was causing heartbeats to be missed upon retries when a single server fails. Also made a drive by fix to call the periodic server shuffler goroutine.
This commit is contained in:
parent
9f3f11df97
commit
b063fc81a4
|
@ -315,6 +315,9 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
|
|||
// Initialize the server manager
|
||||
c.servers = servers.New(c.logger, c.shutdownCh, c)
|
||||
|
||||
// Start server manager rebalancing go routine
|
||||
go c.servers.Start()
|
||||
|
||||
// Initialize the client
|
||||
if err := c.init(); err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize client: %v", err)
|
||||
|
@ -1345,7 +1348,6 @@ func (c *Client) registerAndHeartbeat() {
|
|||
case <-c.shutdownCh:
|
||||
return
|
||||
}
|
||||
|
||||
if err := c.updateNodeStatus(); err != nil {
|
||||
// The servers have changed such that this node has not been
|
||||
// registered before
|
||||
|
@ -2342,13 +2344,6 @@ func (c *Client) consulDiscovery() {
|
|||
func (c *Client) consulDiscoveryImpl() error {
|
||||
consulLogger := c.logger.Named("consul")
|
||||
|
||||
// Acquire heartbeat lock to prevent heartbeat from running
|
||||
// concurrently with discovery. Concurrent execution is safe, however
|
||||
// discovery is usually triggered when heartbeating has failed so
|
||||
// there's no point in allowing it.
|
||||
c.heartbeatLock.Lock()
|
||||
defer c.heartbeatLock.Unlock()
|
||||
|
||||
dcs, err := c.consulCatalog.Datacenters()
|
||||
if err != nil {
|
||||
return fmt.Errorf("client.consul: unable to query Consul datacenters: %v", err)
|
||||
|
@ -2432,6 +2427,26 @@ DISCOLOOP:
|
|||
|
||||
consulLogger.Info("discovered following servers", "servers", nomadServers)
|
||||
|
||||
// Check if the list of servers discovered is identical to the list we already have
|
||||
// If so, we don't need to reset the server list unnecessarily
|
||||
knownServers := make(map[string]struct{})
|
||||
serverList := c.servers.GetServers()
|
||||
for _, s := range serverList {
|
||||
knownServers[s.Addr.String()] = struct{}{}
|
||||
}
|
||||
|
||||
allFound := true
|
||||
for _, s := range nomadServers {
|
||||
_, known := knownServers[s.Addr.String()]
|
||||
if !known {
|
||||
allFound = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if allFound && len(nomadServers) == len(serverList) {
|
||||
c.logger.Info("Not replacing server list, current server list is identical to servers discovered in Consul")
|
||||
return nil
|
||||
}
|
||||
// Fire the retry trigger if we have updated the set of servers.
|
||||
if c.servers.SetServers(nomadServers) {
|
||||
// Start rebalancing
|
||||
|
|
|
@ -7,6 +7,9 @@ data_dir = "/tmp/client1"
|
|||
# Give the agent a unique name. Defaults to hostname
|
||||
name = "client1"
|
||||
|
||||
# Enable debugging
|
||||
enable_debug = true
|
||||
|
||||
# Enable the client
|
||||
client {
|
||||
enabled = true
|
||||
|
|
|
@ -7,6 +7,9 @@ data_dir = "/tmp/client2"
|
|||
# Give the agent a unique name. Defaults to hostname
|
||||
name = "client2"
|
||||
|
||||
# Enable debugging
|
||||
enable_debug = true
|
||||
|
||||
# Enable the client
|
||||
client {
|
||||
enabled = true
|
||||
|
|
|
@ -16,5 +16,5 @@ server {
|
|||
}
|
||||
|
||||
# Self-elect, should be 3 or 5 for production
|
||||
bootstrap_expect = 3
|
||||
bootstrap_expect = 1
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue