From 777504ff0e3534bea8e806518e9f8e361ffe22ee Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 5 Jul 2017 17:09:18 -0500 Subject: [PATCH] Fixes deadlock between barrier write and leader notify channel read . Fixes #3230 --- agent/consul/leader.go | 9 +++++---- agent/consul/server.go | 10 +++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/agent/consul/leader.go b/agent/consul/leader.go index 431aacc77..e0c8af548 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -25,6 +25,7 @@ const ( ConsulServiceID = "consul" ConsulServiceName = "consul" newLeaderEvent = "consul:new-leader" + barrierWriteTimeout = 2 * time.Minute ) // monitorLeadership is used to monitor if we acquire or lose our role @@ -35,13 +36,13 @@ func (s *Server) monitorLeadership() { // leaderCh, which is only notified best-effort. Doing this ensures // that we get all notifications in order, which is required for // cleanup and to ensure we never run multiple leader loops. - leaderCh := s.leaderCh + raftNotifyCh := s.raftNotifyChanel var wg sync.WaitGroup var stopCh chan struct{} for { select { - case isLeader := <-leaderCh: + case isLeader := <-raftNotifyCh: if isLeader { stopCh = make(chan struct{}) wg.Add(1) @@ -96,10 +97,10 @@ RECONCILE: // Apply a raft barrier to ensure our FSM is caught up start := time.Now() - barrier := s.raft.Barrier(0) + barrier := s.raft.Barrier(barrierWriteTimeout) if err := barrier.Error(); err != nil { s.logger.Printf("[ERR] consul: failed to wait for barrier: %v", err) - goto WAIT + return } metrics.MeasureSince([]string{"consul", "leader", "barrier"}, start) diff --git a/agent/consul/server.go b/agent/consul/server.go index fbe19564b..64ea443f0 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -134,9 +134,9 @@ type Server struct { raftTransport *raft.NetworkTransport raftInmem *raft.InmemStore - // leaderCh set up by setupRaft() and ensures that we get reliable leader + // raftNotifyChanel set up by setupRaft() and ensures that we get reliable leader // transition notifications from the Raft layer. - leaderCh <-chan bool + raftNotifyChanel <-chan bool // reconcileCh is used to pass events from the serf handler // into the leader manager, so that the strong state can be @@ -601,9 +601,9 @@ func (s *Server) setupRaft() error { } // Set up a channel for reliable leader notifications. - leaderCh := make(chan bool, 1) - s.config.RaftConfig.NotifyCh = leaderCh - s.leaderCh = leaderCh + raftReconcileChannel := make(chan bool, 1) + s.config.RaftConfig.NotifyCh = raftReconcileChannel + s.raftNotifyChanel = raftReconcileChannel // Setup the Raft store. s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, snap, trans)