From 2949980a64f64421b42d7f067ee75a9d145f9284 Mon Sep 17 00:00:00 2001 From: Sean Chittenden Date: Thu, 18 Feb 2016 17:46:02 -0800 Subject: [PATCH] Warn if serf events have queued up past 80% of the limit It is theoretically possible that the number of queued serf events can back up. If this happens, emit a warning message if there are more than 200 events in queue. Most notably, this can happen if `c.consulServerLock` is held for an "extended period of time". The probability of anyone ever seeing this log message is hopefully low to nonexistent, but if it happens, the warning message indicating a large number of serf events fired while a lock was held is likely to be helpful (vs serf mysteriously blocking when attempting to add an event to a channel). --- consul/client.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/consul/client.go b/consul/client.go index 1596f1e5f..d6d76f3b2 100644 --- a/consul/client.go +++ b/consul/client.go @@ -24,6 +24,16 @@ const ( // clientMaxStreams controls how many idle streams we keep // open to a server clientMaxStreams = 32 + + // serfEventBacklog is the maximum number of unprocessed Serf Events + // that will be held in queue before new serf events block. A + // blocking serf event queue is a bad thing. + serfEventBacklog = 256 + + // serfEventBacklogWarning is the threshold at which point log + // warnings will be emitted indicating a problem when processing serf + // events. + serfEventBacklogWarning = 200 ) // Interface is used to provide either a Client or Server, @@ -102,8 +112,8 @@ func NewClient(config *Config) (*Client, error) { // Create server c := &Client{ config: config, - connPool: NewPool(config.LogOutput, clientRPCCache, clientMaxStreams, tlsWrap), - eventCh: make(chan serf.Event, 256), + connPool: NewPool(config.LogOutput, clientRPCConnMaxIdle, clientMaxStreams, tlsWrap), + eventCh: make(chan serf.Event, serfEventBacklog), logger: logger, shutdownCh: make(chan struct{}), } @@ -214,7 +224,13 @@ func (c *Client) Encrypted() bool { // lanEventHandler is used to handle events from the lan Serf cluster func (c *Client) lanEventHandler() { + var numQueuedEvents int for { + numQueuedEvents = len(c.eventCh) + if numQueuedEvents > serfEventBacklogWarning { + c.logger.Printf("[WARN] consul: number of queued serf events above warning threshold: %d/%d", numQueuedEvents, serfEventBacklogWarning) + } + select { case e := <-c.eventCh: switch e.EventType() {