Remove unnecessary locking and serverlist syncing in heartbeats
This removes an unnecessary shared lock between discovery and heartbeating which was causing heartbeats to be missed upon retries when a single server fails. Also made a drive by fix to call the periodic server shuffler goroutine.
This commit is contained in:
parent
9f3f11df97
commit
b063fc81a4
|
@ -315,6 +315,9 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
|
||||||
// Initialize the server manager
|
// Initialize the server manager
|
||||||
c.servers = servers.New(c.logger, c.shutdownCh, c)
|
c.servers = servers.New(c.logger, c.shutdownCh, c)
|
||||||
|
|
||||||
|
// Start server manager rebalancing go routine
|
||||||
|
go c.servers.Start()
|
||||||
|
|
||||||
// Initialize the client
|
// Initialize the client
|
||||||
if err := c.init(); err != nil {
|
if err := c.init(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to initialize client: %v", err)
|
return nil, fmt.Errorf("failed to initialize client: %v", err)
|
||||||
|
@ -1345,7 +1348,6 @@ func (c *Client) registerAndHeartbeat() {
|
||||||
case <-c.shutdownCh:
|
case <-c.shutdownCh:
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := c.updateNodeStatus(); err != nil {
|
if err := c.updateNodeStatus(); err != nil {
|
||||||
// The servers have changed such that this node has not been
|
// The servers have changed such that this node has not been
|
||||||
// registered before
|
// registered before
|
||||||
|
@ -2342,13 +2344,6 @@ func (c *Client) consulDiscovery() {
|
||||||
func (c *Client) consulDiscoveryImpl() error {
|
func (c *Client) consulDiscoveryImpl() error {
|
||||||
consulLogger := c.logger.Named("consul")
|
consulLogger := c.logger.Named("consul")
|
||||||
|
|
||||||
// Acquire heartbeat lock to prevent heartbeat from running
|
|
||||||
// concurrently with discovery. Concurrent execution is safe, however
|
|
||||||
// discovery is usually triggered when heartbeating has failed so
|
|
||||||
// there's no point in allowing it.
|
|
||||||
c.heartbeatLock.Lock()
|
|
||||||
defer c.heartbeatLock.Unlock()
|
|
||||||
|
|
||||||
dcs, err := c.consulCatalog.Datacenters()
|
dcs, err := c.consulCatalog.Datacenters()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("client.consul: unable to query Consul datacenters: %v", err)
|
return fmt.Errorf("client.consul: unable to query Consul datacenters: %v", err)
|
||||||
|
@ -2432,6 +2427,26 @@ DISCOLOOP:
|
||||||
|
|
||||||
consulLogger.Info("discovered following servers", "servers", nomadServers)
|
consulLogger.Info("discovered following servers", "servers", nomadServers)
|
||||||
|
|
||||||
|
// Check if the list of servers discovered is identical to the list we already have
|
||||||
|
// If so, we don't need to reset the server list unnecessarily
|
||||||
|
knownServers := make(map[string]struct{})
|
||||||
|
serverList := c.servers.GetServers()
|
||||||
|
for _, s := range serverList {
|
||||||
|
knownServers[s.Addr.String()] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
allFound := true
|
||||||
|
for _, s := range nomadServers {
|
||||||
|
_, known := knownServers[s.Addr.String()]
|
||||||
|
if !known {
|
||||||
|
allFound = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if allFound && len(nomadServers) == len(serverList) {
|
||||||
|
c.logger.Info("Not replacing server list, current server list is identical to servers discovered in Consul")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
// Fire the retry trigger if we have updated the set of servers.
|
// Fire the retry trigger if we have updated the set of servers.
|
||||||
if c.servers.SetServers(nomadServers) {
|
if c.servers.SetServers(nomadServers) {
|
||||||
// Start rebalancing
|
// Start rebalancing
|
||||||
|
|
|
@ -7,6 +7,9 @@ data_dir = "/tmp/client1"
|
||||||
# Give the agent a unique name. Defaults to hostname
|
# Give the agent a unique name. Defaults to hostname
|
||||||
name = "client1"
|
name = "client1"
|
||||||
|
|
||||||
|
# Enable debugging
|
||||||
|
enable_debug = true
|
||||||
|
|
||||||
# Enable the client
|
# Enable the client
|
||||||
client {
|
client {
|
||||||
enabled = true
|
enabled = true
|
||||||
|
|
|
@ -7,6 +7,9 @@ data_dir = "/tmp/client2"
|
||||||
# Give the agent a unique name. Defaults to hostname
|
# Give the agent a unique name. Defaults to hostname
|
||||||
name = "client2"
|
name = "client2"
|
||||||
|
|
||||||
|
# Enable debugging
|
||||||
|
enable_debug = true
|
||||||
|
|
||||||
# Enable the client
|
# Enable the client
|
||||||
client {
|
client {
|
||||||
enabled = true
|
enabled = true
|
||||||
|
|
|
@ -16,5 +16,5 @@ server {
|
||||||
}
|
}
|
||||||
|
|
||||||
# Self-elect, should be 3 or 5 for production
|
# Self-elect, should be 3 or 5 for production
|
||||||
bootstrap_expect = 3
|
bootstrap_expect = 1
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue