Add async reconciliation controller subpackage (#15534)

* Add async reconciliation controller subpackage * Address initial feedback * Add tests for panic assertions * Fix comment
2022-12-16 16:49:26 -05:00 · 2022-12-16 16:49:26 -05:00 · 1ff0906a3e
parent a1ceeff461
commit 1ff0906a3e
10 changed files with 1309 additions and 0 deletions
--- a/agent/consul/controller/controller.go
+++ b/agent/consul/controller/controller.go
@ -0,0 +1,289 @@
 package controller
 import (
 	"context"
 	"errors"
 	"fmt"
 	"sync/atomic"
 	"time"
 	"github.com/hashicorp/consul/agent/consul/state"
 	"github.com/hashicorp/consul/agent/consul/stream"
 	"github.com/hashicorp/consul/agent/structs"
 	"golang.org/x/sync/errgroup"
 )
 // much of this is a re-implementation of
 // https://github.com/kubernetes-sigs/controller-runtime/blob/release-0.13/pkg/internal/controller/controller.go
 // Transformer is a function that takes one type of config entry that has changed
 // and transforms that into a set of reconciliation requests to enqueue.
 type Transformer func(entry structs.ConfigEntry) []Request
 // Controller subscribes to a set of watched resources from the
 // state store and delegates processing them to a given Reconciler.
 // If a Reconciler errors while processing a Request, then the
 // Controller handles rescheduling the Request to be re-processed.
 type Controller interface {
 	// Run begins the Controller's main processing loop. When the given
 	// context is canceled, the Controller stops processing any remaining work.
 	// The Run function should only ever be called once.
 	Run(ctx context.Context) error
 	// Subscribe tells the controller to subscribe to updates for config entries based
 	// on the given request. Optional transformation functions can also be passed in
 	// to Subscribe, allowing a controller to map a config entry to a different type of
 	// request under the hood (i.e. watching a dependency and triggering a Reconcile on
 	// the dependent resource). This should only ever be called prior to calling Run.
 	Subscribe(request *stream.SubscribeRequest, transformers ...Transformer) Controller
 	// WithBackoff changes the base and maximum backoff values for the Controller's
 	// Request retry rate limiter. This should only ever be called prior to
 	// running Run.
 	WithBackoff(base, max time.Duration) Controller
 	// WithWorkers sets the number of worker goroutines used to process the queue
 	// this defaults to 1 goroutine.
 	WithWorkers(i int) Controller
 	// WithQueueFactory allows a Controller to replace its underlying work queue
 	// implementation. This is most useful for testing. This should only ever be called
 	// prior to running Run.
 	WithQueueFactory(fn func(ctx context.Context, baseBackoff time.Duration, maxBackoff time.Duration) WorkQueue) Controller
 }
 var _ Controller = &controller{}
 type subscription struct {
 	request      *stream.SubscribeRequest
 	transformers []Transformer
 }
 // controller implements the Controller interface
 type controller struct {
 	// reconciler is the Reconciler that processes all subscribed
 	// Requests
 	reconciler Reconciler
 	// makeQueue is the factory used for creating the work queue, generally
 	// this shouldn't be touched, but can be updated for testing purposes
 	makeQueue func(ctx context.Context, baseBackoff time.Duration, maxBackoff time.Duration) WorkQueue
 	// workers is the number of workers to use to process data
 	workers int
 	// work is the internal work queue that pending Requests are added to
 	work WorkQueue
 	// baseBackoff is the starting backoff time for the work queue's rate limiter
 	baseBackoff time.Duration
 	// maxBackoff is the maximum backoff time for the work queue's rate limiter
 	maxBackoff time.Duration
 	// subscriptions is a list of subscription requests for retrieving configuration entries
 	subscriptions []subscription
 	// publisher is the event publisher that should be subscribed to for any updates
 	publisher state.EventPublisher
 	// running ensures that we are only calling Run a single time
 	running int32
 }
 // New returns a new Controller associated with the given state store and reconciler.
 func New(publisher state.EventPublisher, reconciler Reconciler) Controller {
 	return &controller{
 		reconciler:  reconciler,
 		publisher:   publisher,
 		workers:     1,
 		baseBackoff: 5 * time.Millisecond,
 		maxBackoff:  1000 * time.Second,
 		makeQueue:   RunWorkQueue,
 	}
 }
 // Subscribe tells the controller to subscribe to updates for config entries of the
 // given kind and with the associated enterprise metadata. This should only ever be
 // called prior to running Start.
 func (c *controller) Subscribe(request *stream.SubscribeRequest, transformers ...Transformer) Controller {
 	c.ensureNotRunning()
 	c.subscriptions = append(c.subscriptions, subscription{
 		request:      request,
 		transformers: transformers,
 	})
 	return c
 }
 // WithBackoff changes the base and maximum backoff values for the Controller's
 // Request retry rate limiter. This should only ever be called prior to
 // running Start.
 func (c *controller) WithBackoff(base, max time.Duration) Controller {
 	c.ensureNotRunning()
 	c.baseBackoff = base
 	c.maxBackoff = max
 	return c
 }
 // WithWorkers sets the number of worker goroutines used to process the queue
 // this defaults to 1 goroutine.
 func (c *controller) WithWorkers(i int) Controller {
 	c.ensureNotRunning()
 	if i <= 0 {
 		i = 1
 	}
 	c.workers = i
 	return c
 }
 // WithQueueFactory changes the initialization method for the Controller's work
 // queue, this is predominantly just used for testing. This should only ever be called
 // prior to running Start.
 func (c *controller) WithQueueFactory(fn func(ctx context.Context, baseBackoff time.Duration, maxBackoff time.Duration) WorkQueue) Controller {
 	c.ensureNotRunning()
 	c.makeQueue = fn
 	return c
 }
 // ensureNotRunning makes sure we aren't trying to reconfigure an already
 // running controller, it panics if Run has already been invoked
 func (c *controller) ensureNotRunning() {
 	if atomic.LoadInt32(&c.running) == 1 {
 		panic("cannot configure controller once Run is called")
 	}
 }
 // Run begins the Controller's main processing loop. When the given
 // context is canceled, the Controller stops processing any remaining work.
 // The Run function should only ever be called once, calling it multiple
 // times will result in a panic.
 func (c *controller) Run(ctx context.Context) error {
 	if !atomic.CompareAndSwapInt32(&c.running, 0, 1) {
 		panic("Run cannot be called more than once")
 	}
 	group, groupCtx := errgroup.WithContext(ctx)
 	// set up our queue
 	c.work = c.makeQueue(groupCtx, c.baseBackoff, c.maxBackoff)
 	for _, sub := range c.subscriptions {
 		// store a reference for the closure
 		sub := sub
 		group.Go(func() error {
 			var index uint64
 			subscription, err := c.publisher.Subscribe(sub.request)
 			if err != nil {
 				return err
 			}
 			defer subscription.Unsubscribe()
 			for {
 				event, err := subscription.Next(ctx)
 				switch {
 				case errors.Is(err, context.Canceled):
 					return nil
 				case err != nil:
 					return err
 				}
 				if event.IsFramingEvent() {
 					continue
 				}
 				if event.Index <= index {
 					continue
 				}
 				index = event.Index
 				if err := c.processEvent(sub, event); err != nil {
 					return err
 				}
 			}
 		})
 	}
 	for i := 0; i < c.workers; i++ {
 		group.Go(func() error {
 			for {
 				request, shutdown := c.work.Get()
 				if shutdown {
 					// Stop working
 					return nil
 				}
 				c.reconcileHandler(groupCtx, request)
 				// Done is called here because it is required to be called
 				// when we've finished processing each request
 				c.work.Done(request)
 			}
 		})
 	}
 	<-groupCtx.Done()
 	return nil
 }
 func (c *controller) processEvent(sub subscription, event stream.Event) error {
 	switch payload := event.Payload.(type) {
 	case state.EventPayloadConfigEntry:
 		c.enqueueEntry(payload.Value, sub.transformers...)
 		return nil
 	case *stream.PayloadEvents:
 		for _, event := range payload.Items {
 			if err := c.processEvent(sub, event); err != nil {
 				return err
 			}
 		}
 		return nil
 	default:
 		return fmt.Errorf("unhandled event type: %T", payload)
 	}
 }
 // enqueueEntry adds all of the given entry into the work queue. If given
 // one or more transformation functions, it will enqueue all of the resulting
 // reconciliation requests returned from each Transformer.
 func (c *controller) enqueueEntry(entry structs.ConfigEntry, transformers ...Transformer) {
 	if len(transformers) == 0 {
 		c.work.Add(Request{
 			Kind: entry.GetKind(),
 			Name: entry.GetName(),
 			Meta: entry.GetEnterpriseMeta(),
 		})
 	} else {
 		for _, fn := range transformers {
 			for _, request := range fn(entry) {
 				c.work.Add(request)
 			}
 		}
 	}
 }
 // reconcile wraps the reconciler in a panic handler
 func (c *controller) reconcile(ctx context.Context, req Request) (err error) {
 	defer func() {
 		if r := recover(); r != nil {
 			err = fmt.Errorf("panic [recovered]: %v", r)
 			return
 		}
 	}()
 	return c.reconciler.Reconcile(ctx, req)
 }
 // reconcileHandler invokes the reconciler and looks at its return value
 // to determine whether the request should be rescheduled
 func (c *controller) reconcileHandler(ctx context.Context, req Request) {
 	if err := c.reconcile(ctx, req); err != nil {
 		// handle the case where we're specifically told to requeue later
 		var requeueAfter RequeueAfterError
 		if errors.As(err, &requeueAfter) {
 			c.work.Forget(req)
 			c.work.AddAfter(req, time.Duration(requeueAfter))
 			return
 		}
 		// fallback to rate limit ourselves
 		c.work.AddRateLimited(req)
 		return
 	}
 	// if no error then Forget this request so it is not retried
 	c.work.Forget(req)
 }
--- a/agent/consul/controller/controller_test.go
+++ b/agent/consul/controller/controller_test.go
@ -0,0 +1,273 @@
 package controller
 import (
 	"context"
 	"errors"
 	"fmt"
 	"testing"
 	"time"
 	"github.com/hashicorp/consul/agent/consul/fsm"
 	"github.com/hashicorp/consul/agent/consul/state"
 	"github.com/hashicorp/consul/agent/consul/stream"
 	"github.com/hashicorp/consul/agent/structs"
 	"github.com/hashicorp/go-hclog"
 	"github.com/stretchr/testify/require"
 )
 func TestBasicController(t *testing.T) {
 	t.Parallel()
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
 	reconciler := newTestReconciler(false)
 	publisher := stream.NewEventPublisher(1 * time.Millisecond)
 	go publisher.Run(ctx)
 	// get the store through the FSM since the publisher handlers get registered through it
 	store := fsm.NewFromDeps(fsm.Deps{
 		Logger: hclog.New(nil),
 		NewStateStore: func() *state.Store {
 			return state.NewStateStoreWithEventPublisher(nil, publisher)
 		},
 		Publisher: publisher,
 	}).State()
 	for i := 0; i < 200; i++ {
 		entryIndex := uint64(i + 1)
 		name := fmt.Sprintf("foo-%d", i)
 		require.NoError(t, store.EnsureConfigEntry(entryIndex, &structs.IngressGatewayConfigEntry{
 			Kind: structs.IngressGateway,
 			Name: name,
 		}))
 	}
 	go New(publisher, reconciler).Subscribe(&stream.SubscribeRequest{
 		Topic:   state.EventTopicIngressGateway,
 		Subject: stream.SubjectWildcard,
 	}).WithWorkers(10).Run(ctx)
 	received := []string{}
 LOOP:
 	for {
 		select {
 		case request := <-reconciler.received:
 			require.Equal(t, structs.IngressGateway, request.Kind)
 			received = append(received, request.Name)
 			if len(received) == 200 {
 				break LOOP
 			}
 		case <-ctx.Done():
 			break LOOP
 		}
 	}
 	// since we only modified each entry once, we should have exactly 200 reconcliation calls
 	require.Len(t, received, 200)
 	for i := 0; i < 200; i++ {
 		require.Contains(t, received, fmt.Sprintf("foo-%d", i))
 	}
 }
 func TestBasicController_Transform(t *testing.T) {
 	t.Parallel()
 	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
 	defer cancel()
 	reconciler := newTestReconciler(false)
 	publisher := stream.NewEventPublisher(0)
 	go publisher.Run(ctx)
 	// get the store through the FSM since the publisher handlers get registered through it
 	store := fsm.NewFromDeps(fsm.Deps{
 		Logger: hclog.New(nil),
 		NewStateStore: func() *state.Store {
 			return state.NewStateStoreWithEventPublisher(nil, publisher)
 		},
 		Publisher: publisher,
 	}).State()
 	go New(publisher, reconciler).Subscribe(&stream.SubscribeRequest{
 		Topic:   state.EventTopicIngressGateway,
 		Subject: stream.SubjectWildcard,
 	}, func(entry structs.ConfigEntry) []Request {
 		return []Request{{
 			Kind: "foo",
 			Name: "bar",
 		}}
 	}).Run(ctx)
 	require.NoError(t, store.EnsureConfigEntry(1, &structs.IngressGatewayConfigEntry{
 		Kind: structs.IngressGateway,
 		Name: "test",
 	}))
 	select {
 	case request := <-reconciler.received:
 		require.Equal(t, "foo", request.Kind)
 		require.Equal(t, "bar", request.Name)
 	case <-ctx.Done():
 		t.Fatal("stopped reconciler before event received")
 	}
 }
 func TestBasicController_Retry(t *testing.T) {
 	t.Parallel()
 	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
 	defer cancel()
 	reconciler := newTestReconciler(true)
 	defer reconciler.stop()
 	publisher := stream.NewEventPublisher(0)
 	go publisher.Run(ctx)
 	// get the store through the FSM since the publisher handlers get registered through it
 	store := fsm.NewFromDeps(fsm.Deps{
 		Logger: hclog.New(nil),
 		NewStateStore: func() *state.Store {
 			return state.NewStateStoreWithEventPublisher(nil, publisher)
 		},
 		Publisher: publisher,
 	}).State()
 	queueInitialized := make(chan *countingWorkQueue)
 	controller := New(publisher, reconciler).Subscribe(&stream.SubscribeRequest{
 		Topic:   state.EventTopicIngressGateway,
 		Subject: stream.SubjectWildcard,
 	}).WithWorkers(-1).WithBackoff(1*time.Millisecond, 1*time.Millisecond)
 	go controller.WithQueueFactory(func(ctx context.Context, baseBackoff, maxBackoff time.Duration) WorkQueue {
 		queue := newCountingWorkQueue(RunWorkQueue(ctx, baseBackoff, maxBackoff))
 		queueInitialized <- queue
 		return queue
 	}).Run(ctx)
 	queue := <-queueInitialized
 	ensureCalled := func(request chan Request, name string) bool {
 		// give a short window for our counters to update
 		defer time.Sleep(10 * time.Millisecond)
 		select {
 		case req := <-request:
 			require.Equal(t, structs.IngressGateway, req.Kind)
 			require.Equal(t, name, req.Name)
 			return true
 		case <-time.After(10 * time.Millisecond):
 			return false
 		}
 	}
 	// check to make sure we are called once
 	queue.reset()
 	require.NoError(t, store.EnsureConfigEntry(1, &structs.IngressGatewayConfigEntry{
 		Kind: structs.IngressGateway,
 		Name: "foo-1",
 	}))
 	require.False(t, ensureCalled(reconciler.received, "foo-1"))
 	require.EqualValues(t, 0, queue.dones())
 	require.EqualValues(t, 0, queue.requeues())
 	reconciler.step()
 	require.True(t, ensureCalled(reconciler.received, "foo-1"))
 	require.EqualValues(t, 1, queue.dones())
 	require.EqualValues(t, 0, queue.requeues())
 	// check that we requeue when an arbitrary error occurs
 	queue.reset()
 	reconciler.setResponse(errors.New("error"))
 	require.NoError(t, store.EnsureConfigEntry(2, &structs.IngressGatewayConfigEntry{
 		Kind: structs.IngressGateway,
 		Name: "foo-2",
 	}))
 	require.False(t, ensureCalled(reconciler.received, "foo-2"))
 	require.EqualValues(t, 0, queue.dones())
 	require.EqualValues(t, 0, queue.requeues())
 	require.EqualValues(t, 0, queue.addRateLimiteds())
 	reconciler.step()
 	// check we're processed the first time and re-queued
 	require.True(t, ensureCalled(reconciler.received, "foo-2"))
 	require.EqualValues(t, 1, queue.dones())
 	require.EqualValues(t, 1, queue.requeues())
 	require.EqualValues(t, 1, queue.addRateLimiteds())
 	// now make sure we succeed
 	reconciler.setResponse(nil)
 	reconciler.step()
 	require.True(t, ensureCalled(reconciler.received, "foo-2"))
 	require.EqualValues(t, 2, queue.dones())
 	require.EqualValues(t, 1, queue.requeues())
 	require.EqualValues(t, 1, queue.addRateLimiteds())
 	// check that we requeue at a given rate when using a RequeueAfterError
 	queue.reset()
 	reconciler.setResponse(RequeueNow())
 	require.NoError(t, store.EnsureConfigEntry(3, &structs.IngressGatewayConfigEntry{
 		Kind: structs.IngressGateway,
 		Name: "foo-3",
 	}))
 	require.False(t, ensureCalled(reconciler.received, "foo-3"))
 	require.EqualValues(t, 0, queue.dones())
 	require.EqualValues(t, 0, queue.requeues())
 	require.EqualValues(t, 0, queue.addRateLimiteds())
 	reconciler.step()
 	// check we're processed the first time and re-queued
 	require.True(t, ensureCalled(reconciler.received, "foo-3"))
 	require.EqualValues(t, 1, queue.dones())
 	require.EqualValues(t, 1, queue.requeues())
 	require.EqualValues(t, 1, queue.addAfters())
 	// now make sure we succeed
 	reconciler.setResponse(nil)
 	reconciler.step()
 	require.True(t, ensureCalled(reconciler.received, "foo-3"))
 	require.EqualValues(t, 2, queue.dones())
 	require.EqualValues(t, 1, queue.requeues())
 	require.EqualValues(t, 1, queue.addAfters())
 }
 func TestBasicController_RunPanicAssertions(t *testing.T) {
 	t.Parallel()
 	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
 	defer cancel()
 	started := make(chan struct{})
 	reconciler := newTestReconciler(false)
 	publisher := stream.NewEventPublisher(0)
 	controller := New(publisher, reconciler).WithQueueFactory(func(ctx context.Context, baseBackoff, maxBackoff time.Duration) WorkQueue {
 		close(started)
 		return RunWorkQueue(ctx, baseBackoff, maxBackoff)
 	})
 	subscription := &stream.SubscribeRequest{
 		Topic:   state.EventTopicIngressGateway,
 		Subject: stream.SubjectWildcard,
 	}
 	// kick off the controller
 	go controller.Subscribe(subscription).Run(ctx)
 	// wait to make sure the following assertions don't
 	// get run before the above goroutine is spawned
 	<-started
 	// make sure we can't call Run again
 	require.Panics(t, func() {
 		controller.Run(ctx)
 	})
 	// make sure all of our configuration methods panic
 	require.Panics(t, func() {
 		controller.Subscribe(subscription)
 	})
 	require.Panics(t, func() {
 		controller.WithBackoff(1, 10)
 	})
 	require.Panics(t, func() {
 		controller.WithWorkers(1)
 	})
 	require.Panics(t, func() {
 		controller.WithQueueFactory(RunWorkQueue)
 	})
 }
--- a/agent/consul/controller/defer.go
+++ b/agent/consul/controller/defer.go
@ -0,0 +1,224 @@
 package controller
 import (
 	"container/heap"
 	"context"
 	"time"
 )
 // much of this is a re-implementation of
 // https://github.com/kubernetes/client-go/blob/release-1.25/util/workqueue/delaying_queue.go
 // DeferQueue is a generic priority queue implementation that
 // allows for deferring and later processing Requests.
 type DeferQueue interface {
 	// Defer defers processing a Request until a given time. When
 	// the timeout is hit, the request will be processed by the
 	// callback given in the Process loop. If the given context
 	// is canceled, the item is not deferred.
 	Defer(ctx context.Context, item Request, until time.Time)
 	// Process processes all items in the defer queue with the
 	// given callback, blocking until the given context is canceled.
 	// Callers should only ever call Process once, likely in a
 	// long-lived goroutine.
 	Process(ctx context.Context, callback func(item Request))
 }
 // deferredRequest is a wrapped Request with information about
 // when a retry should be attempted
 type deferredRequest struct {
 	enqueueAt time.Time
 	item      Request
 	// index holds the index for the given heap entry so that if
 	// the entry is updated the heap can be re-sorted
 	index int
 }
 // deferQueue is a priority queue for deferring Requests for
 // future processing
 type deferQueue struct {
 	heap    *deferHeap
 	entries map[Request]*deferredRequest
 	addChannel     chan *deferredRequest
 	heartbeat      *time.Ticker
 	nextReadyTimer *time.Timer
 }
 // NewDeferQueue returns a priority queue for deferred Requests.
 func NewDeferQueue(tick time.Duration) DeferQueue {
 	dHeap := &deferHeap{}
 	heap.Init(dHeap)
 	return &deferQueue{
 		heap:       dHeap,
 		entries:    make(map[Request]*deferredRequest),
 		addChannel: make(chan *deferredRequest),
 		heartbeat:  time.NewTicker(tick),
 	}
 }
 // Defer defers the given Request until the given time in the future. If the
 // passed in context is canceled before the Request is deferred, then this
 // immediately returns.
 func (q *deferQueue) Defer(ctx context.Context, item Request, until time.Time) {
 	entry := &deferredRequest{
 		enqueueAt: until,
 		item:      item,
 	}
 	select {
 	case <-ctx.Done():
 	case q.addChannel <- entry:
 	}
 }
 // deferEntry adds a deferred request to the priority queue
 func (q *deferQueue) deferEntry(entry *deferredRequest) {
 	existing, exists := q.entries[entry.item]
 	if exists {
 		// insert or update the item deferral time
 		if existing.enqueueAt.After(entry.enqueueAt) {
 			existing.enqueueAt = entry.enqueueAt
 			heap.Fix(q.heap, existing.index)
 		}
 		return
 	}
 	heap.Push(q.heap, entry)
 	q.entries[entry.item] = entry
 }
 // readyRequest returns a pointer to the next ready Request or
 // nil if no Requests are ready to be processed
 func (q *deferQueue) readyRequest() *Request {
 	if q.heap.Len() == 0 {
 		return nil
 	}
 	now := time.Now()
 	entry := q.heap.Peek().(*deferredRequest)
 	if entry.enqueueAt.After(now) {
 		return nil
 	}
 	entry = heap.Pop(q.heap).(*deferredRequest)
 	delete(q.entries, entry.item)
 	return &entry.item
 }
 // signalReady returns a timer signal to the next Request
 // that will be ready on the queue
 func (q *deferQueue) signalReady() <-chan time.Time {
 	if q.heap.Len() == 0 {
 		return make(<-chan time.Time)
 	}
 	if q.nextReadyTimer != nil {
 		q.nextReadyTimer.Stop()
 	}
 	now := time.Now()
 	entry := q.heap.Peek().(*deferredRequest)
 	q.nextReadyTimer = time.NewTimer(entry.enqueueAt.Sub(now))
 	return q.nextReadyTimer.C
 }
 // Process processes all items in the defer queue with the
 // given callback, blocking until the given context is canceled.
 // Callers should only ever call Process once, likely in a
 // long-lived goroutine.
 func (q *deferQueue) Process(ctx context.Context, callback func(item Request)) {
 	for {
 		ready := q.readyRequest()
 		if ready != nil {
 			callback(*ready)
 		}
 		signalReady := q.signalReady()
 		select {
 		case <-ctx.Done():
 			if q.nextReadyTimer != nil {
 				q.nextReadyTimer.Stop()
 			}
 			q.heartbeat.Stop()
 			return
 		case <-q.heartbeat.C:
 			// continue the loop, which process ready items
 		case <-signalReady:
 			// continue the loop, which process ready items
 		case entry := <-q.addChannel:
 			enqueueOrProcess := func(entry *deferredRequest) {
 				now := time.Now()
 				if entry.enqueueAt.After(now) {
 					q.deferEntry(entry)
 				} else {
 					// fast-path, process immediately if we don't need to defer
 					callback(entry.item)
 				}
 			}
 			enqueueOrProcess(entry)
 			// drain the add channel before we do anything else
 			drained := false
 			for !drained {
 				select {
 				case entry := <-q.addChannel:
 					enqueueOrProcess(entry)
 				default:
 					drained = true
 				}
 			}
 		}
 	}
 }
 var _ heap.Interface = &deferHeap{}
 // deferHeap implements heap.Interface
 type deferHeap []*deferredRequest
 // Len returns the length of the heap.
 func (h deferHeap) Len() int {
 	return len(h)
 }
 // Less compares heap items for purposes of sorting.
 func (h deferHeap) Less(i, j int) bool {
 	return h[i].enqueueAt.Before(h[j].enqueueAt)
 }
 // Swap swaps two entries in the heap.
 func (h deferHeap) Swap(i, j int) {
 	h[i], h[j] = h[j], h[i]
 	h[i].index = i
 	h[j].index = j
 }
 // Push pushes an entry onto the heap.
 func (h *deferHeap) Push(x interface{}) {
 	n := len(*h)
 	item := x.(*deferredRequest)
 	item.index = n
 	*h = append(*h, item)
 }
 // Pop pops an entry off the heap.
 func (h *deferHeap) Pop() interface{} {
 	n := len(*h)
 	item := (*h)[n-1]
 	item.index = -1
 	*h = (*h)[0:(n - 1)]
 	return item
 }
 // Peek returns the next item on the heap.
 func (h deferHeap) Peek() interface{} {
 	return h[0]
 }
--- a/agent/consul/controller/doc.go
+++ b/agent/consul/controller/doc.go
@ -0,0 +1,10 @@
 // Package controller contains a re-implementation of the Kubernetes
 // [controller-runtime](https://github.com/kubernetes-sigs/controller-runtime)
 // with the core using Consul's event publishing pipeline rather than
 // Kubernetes' client list/watch APIs.
 //
 // Generally this package enables defining asynchronous control loops
 // meant to be run on a Consul cluster's leader that reconcile derived state
 // in config entries that might be dependent on multiple sources.
 package controller
--- a/agent/consul/controller/queue.go
+++ b/agent/consul/controller/queue.go
@ -0,0 +1,176 @@
 package controller
 import (
 	"context"
 	"sync"
 	"time"
 )
 // much of this is a re-implementation of
 // https://github.com/kubernetes/client-go/blob/release-1.25/util/workqueue/queue.go
 // WorkQueue is an interface for a work queue with semantics to help with
 // retries and rate limiting.
 type WorkQueue interface {
 	// Get retrieves the next Request in the queue, blocking until a Request is
 	// available, if shutdown is true, then the queue is shutting down and should
 	// no longer be used by the caller.
 	Get() (item Request, shutdown bool)
 	// Add immediately adds a Request to the work queue.
 	Add(item Request)
 	// AddAfter adds a Request to the work queue after a given amount of time.
 	AddAfter(item Request, duration time.Duration)
 	// AddRateLimited adds a Request to the work queue after the amount of time
 	// specified by applying the queue's rate limiter.
 	AddRateLimited(item Request)
 	// Forget signals the queue to reset the rate-limiting for the given Request.
 	Forget(item Request)
 	// Done tells the work queue that the Request has been successfully processed
 	// and can be deleted from the queue.
 	Done(item Request)
 }
 // queue implements a rate-limited work queue
 type queue struct {
 	// queue holds an ordered list of Requests needing to be processed
 	queue []Request
 	// dirty holds the working set of all Requests, whether they are being
 	// processed or not
 	dirty map[Request]struct{}
 	// processing holds the set of current requests being processed
 	processing map[Request]struct{}
 	// deferred is an internal priority queue that tracks deferred
 	// Requests
 	deferred DeferQueue
 	// ratelimiter is the internal rate-limiter for the queue
 	ratelimiter Limiter
 	// cond synchronizes queue access and handles signalling for when
 	// data is available in the queue
 	cond *sync.Cond
 	// ctx is the top-level context that, when canceled, shuts down the queue
 	ctx context.Context
 }
 // RunWorkQueue returns a started WorkQueue that has per-Request exponential backoff rate-limiting.
 // When the passed in context is canceled, the queue shuts down.
 func RunWorkQueue(ctx context.Context, baseBackoff, maxBackoff time.Duration) WorkQueue {
 	q := &queue{
 		ratelimiter: NewRateLimiter(baseBackoff, maxBackoff),
 		dirty:       make(map[Request]struct{}),
 		processing:  make(map[Request]struct{}),
 		cond:        sync.NewCond(&sync.Mutex{}),
 		deferred:    NewDeferQueue(500 * time.Millisecond),
 		ctx:         ctx,
 	}
 	go q.start()
 	return q
 }
 // start begins the asynchronous processing loop for the deferral queue
 func (q *queue) start() {
 	go q.deferred.Process(q.ctx, func(item Request) {
 		q.Add(item)
 	})
 	<-q.ctx.Done()
 	q.cond.Broadcast()
 }
 // shuttingDown returns whether the queue is in the process of shutting down
 func (q *queue) shuttingDown() bool {
 	select {
 	case <-q.ctx.Done():
 		return true
 	default:
 		return false
 	}
 }
 // Get returns the next Request to be processed by the caller, blocking until
 // an item is available in the queue. If the returned shutdown parameter is true,
 // then the caller should stop using the queue. Any Requests returned by a call
 // to Get must be explicitly marked as processed via the Done method.
 func (q *queue) Get() (item Request, shutdown bool) {
 	q.cond.L.Lock()
 	defer q.cond.L.Unlock()
 	for len(q.queue) == 0 && !q.shuttingDown() {
 		q.cond.Wait()
 	}
 	if len(q.queue) == 0 {
 		// We must be shutting down.
 		return Request{}, true
 	}
 	item, q.queue = q.queue[0], q.queue[1:]
 	q.processing[item] = struct{}{}
 	delete(q.dirty, item)
 	return item, false
 }
 // Add puts the given Request in the queue. If the Request is already in
 // the queue or the queue is stopping, then this is a no-op.
 func (q *queue) Add(item Request) {
 	q.cond.L.Lock()
 	defer q.cond.L.Unlock()
 	if q.shuttingDown() {
 		return
 	}
 	if _, ok := q.dirty[item]; ok {
 		return
 	}
 	q.dirty[item] = struct{}{}
 	if _, ok := q.processing[item]; ok {
 		return
 	}
 	q.queue = append(q.queue, item)
 	q.cond.Signal()
 }
 // AddAfter adds a Request to the work queue after a given amount of time.
 func (q *queue) AddAfter(item Request, duration time.Duration) {
 	// don't add if we're already shutting down
 	if q.shuttingDown() {
 		return
 	}
 	// immediately add if there is no delay
 	if duration <= 0 {
 		q.Add(item)
 		return
 	}
 	q.deferred.Defer(q.ctx, item, time.Now().Add(duration))
 }
 // AddRateLimited adds the given Request to the queue after applying the
 // rate limiter to determine when the Request should next be processed.
 func (q *queue) AddRateLimited(item Request) {
 	q.AddAfter(item, q.ratelimiter.NextRetry(item))
 }
 // Forget signals the queue to reset the rate-limiting for the given Request.
 func (q *queue) Forget(item Request) {
 	q.ratelimiter.Forget(item)
 }
 // Done removes the item from the queue, if it has been marked dirty
 // again while being processed, it is re-added to the queue.
 func (q *queue) Done(item Request) {
 	q.cond.L.Lock()
 	defer q.cond.L.Unlock()
 	delete(q.processing, item)
 	if _, ok := q.dirty[item]; ok {
 		q.queue = append(q.queue, item)
 		q.cond.Signal()
 	}
 }
--- a/agent/consul/controller/queue_test.go
+++ b/agent/consul/controller/queue_test.go
@ -0,0 +1,93 @@
 package controller
 import (
 	"sync/atomic"
 	"time"
 )
 var _ WorkQueue = &countingWorkQueue{}
 type countingWorkQueue struct {
 	getCounter            uint64
 	addCounter            uint64
 	addAfterCounter       uint64
 	addRateLimitedCounter uint64
 	forgetCounter         uint64
 	doneCounter           uint64
 	inner WorkQueue
 }
 func newCountingWorkQueue(inner WorkQueue) *countingWorkQueue {
 	return &countingWorkQueue{
 		inner: inner,
 	}
 }
 func (c *countingWorkQueue) reset() {
 	atomic.StoreUint64(&c.getCounter, 0)
 	atomic.StoreUint64(&c.addCounter, 0)
 	atomic.StoreUint64(&c.addAfterCounter, 0)
 	atomic.StoreUint64(&c.addRateLimitedCounter, 0)
 	atomic.StoreUint64(&c.forgetCounter, 0)
 	atomic.StoreUint64(&c.doneCounter, 0)
 }
 func (c *countingWorkQueue) requeues() uint64 {
 	return c.addAfters() + c.addRateLimiteds()
 }
 func (c *countingWorkQueue) Get() (item Request, shutdown bool) {
 	item, err := c.inner.Get()
 	atomic.AddUint64(&c.getCounter, 1)
 	return item, err
 }
 func (c *countingWorkQueue) gets() uint64 {
 	return atomic.LoadUint64(&c.getCounter)
 }
 func (c *countingWorkQueue) Add(item Request) {
 	c.inner.Add(item)
 	atomic.AddUint64(&c.addCounter, 1)
 }
 func (c *countingWorkQueue) adds() uint64 {
 	return atomic.LoadUint64(&c.addCounter)
 }
 func (c *countingWorkQueue) AddAfter(item Request, duration time.Duration) {
 	c.inner.AddAfter(item, duration)
 	atomic.AddUint64(&c.addAfterCounter, 1)
 }
 func (c *countingWorkQueue) addAfters() uint64 {
 	return atomic.LoadUint64(&c.addAfterCounter)
 }
 func (c *countingWorkQueue) AddRateLimited(item Request) {
 	c.inner.AddRateLimited(item)
 	atomic.AddUint64(&c.addRateLimitedCounter, 1)
 }
 func (c *countingWorkQueue) addRateLimiteds() uint64 {
 	return atomic.LoadUint64(&c.addRateLimitedCounter)
 }
 func (c *countingWorkQueue) Forget(item Request) {
 	c.inner.Forget(item)
 	atomic.AddUint64(&c.forgetCounter, 1)
 }
 func (c *countingWorkQueue) forgets() uint64 {
 	return atomic.LoadUint64(&c.forgetCounter)
 }
 func (c *countingWorkQueue) Done(item Request) {
 	c.inner.Done(item)
 	atomic.AddUint64(&c.doneCounter, 1)
 }
 func (c *countingWorkQueue) dones() uint64 {
 	return atomic.LoadUint64(&c.doneCounter)
 }
--- a/agent/consul/controller/rate.go
+++ b/agent/consul/controller/rate.go
@ -0,0 +1,70 @@
 package controller
 import (
 	"math"
 	"sync"
 	"time"
 )
 // much of this is a re-implementation of:
 // https://github.com/kubernetes/client-go/blob/release-1.25/util/workqueue/default_rate_limiters.go
 // Limiter is an interface for a rate limiter that can limit
 // the number of retries processed in the work queue.
 type Limiter interface {
 	// NextRetry returns the remaining time until the queue should
 	// reprocess a Request.
 	NextRetry(request Request) time.Duration
 	// Forget causes the Limiter to reset the backoff for the Request.
 	Forget(request Request)
 }
 var _ Limiter = &ratelimiter{}
 type ratelimiter struct {
 	failures map[Request]int
 	base     time.Duration
 	max      time.Duration
 	mutex    sync.RWMutex
 }
 // NewRateLimiter returns a Limiter that does per-item exponential
 // backoff.
 func NewRateLimiter(base, max time.Duration) Limiter {
 	return &ratelimiter{
 		failures: make(map[Request]int),
 		base:     base,
 		max:      max,
 	}
 }
 // NextRetry returns the remaining time until the queue should
 // reprocess a Request.
 func (r *ratelimiter) NextRetry(request Request) time.Duration {
 	r.mutex.RLock()
 	defer r.mutex.RUnlock()
 	exponent := r.failures[request]
 	r.failures[request] = r.failures[request] + 1
 	backoff := float64(r.base.Nanoseconds()) * math.Pow(2, float64(exponent))
 	// make sure we don't overflow time.Duration
 	if backoff > math.MaxInt64 {
 		return r.max
 	}
 	calculated := time.Duration(backoff)
 	if calculated > r.max {
 		return r.max
 	}
 	return calculated
 }
 // Forget causes the Limiter to reset the backoff for the Request.
 func (r *ratelimiter) Forget(request Request) {
 	r.mutex.Lock()
 	defer r.mutex.Unlock()
 	delete(r.failures, request)
 }
--- a/agent/consul/controller/rate_test.go
+++ b/agent/consul/controller/rate_test.go
@ -0,0 +1,62 @@
 package controller
 import (
 	"testing"
 	"time"
 	"github.com/stretchr/testify/require"
 )
 func TestRateLimiter_Backoff(t *testing.T) {
 	t.Parallel()
 	limiter := NewRateLimiter(1*time.Millisecond, 1*time.Second)
 	request := Request{Kind: "one"}
 	require.Equal(t, 1*time.Millisecond, limiter.NextRetry(request))
 	require.Equal(t, 2*time.Millisecond, limiter.NextRetry(request))
 	require.Equal(t, 4*time.Millisecond, limiter.NextRetry(request))
 	require.Equal(t, 8*time.Millisecond, limiter.NextRetry(request))
 	require.Equal(t, 16*time.Millisecond, limiter.NextRetry(request))
 	requestTwo := Request{Kind: "two"}
 	require.Equal(t, 1*time.Millisecond, limiter.NextRetry(requestTwo))
 	require.Equal(t, 2*time.Millisecond, limiter.NextRetry(requestTwo))
 	limiter.Forget(request)
 	require.Equal(t, 1*time.Millisecond, limiter.NextRetry(request))
 }
 func TestRateLimiter_Overflow(t *testing.T) {
 	t.Parallel()
 	limiter := NewRateLimiter(1*time.Millisecond, 1000*time.Second)
 	request := Request{Kind: "one"}
 	for i := 0; i < 5; i++ {
 		limiter.NextRetry(request)
 	}
 	// ensure we have a normally incrementing exponential backoff
 	require.Equal(t, 32*time.Millisecond, limiter.NextRetry(request))
 	overflow := Request{Kind: "overflow"}
 	for i := 0; i < 1000; i++ {
 		limiter.NextRetry(overflow)
 	}
 	// make sure we're capped at the passed in max backoff
 	require.Equal(t, 1000*time.Second, limiter.NextRetry(overflow))
 	limiter = NewRateLimiter(1*time.Minute, 1000*time.Hour)
 	for i := 0; i < 2; i++ {
 		limiter.NextRetry(request)
 	}
 	// ensure we have a normally incrementing exponential backoff
 	require.Equal(t, 4*time.Minute, limiter.NextRetry(request))
 	for i := 0; i < 1000; i++ {
 		limiter.NextRetry(overflow)
 	}
 	// make sure we're capped at the passed in max backoff
 	require.Equal(t, 1000*time.Hour, limiter.NextRetry(overflow))
 }
--- a/agent/consul/controller/reconciler.go
+++ b/agent/consul/controller/reconciler.go
@ -0,0 +1,51 @@
 package controller
 import (
 	"context"
 	"fmt"
 	"time"
 	"github.com/hashicorp/consul/acl"
 )
 // Request contains the information necessary to reconcile a config entry.
 // This includes only the information required to uniquely identify the
 // config entry.
 type Request struct {
 	Kind string
 	Name string
 	Meta *acl.EnterpriseMeta
 }
 // RequeueAfterError is an error that allows a Reconciler to override the
 // exponential backoff behavior of the Controller, rather than applying
 // the backoff algorithm, returning a RequeueAfterError will cause the
 // Controller to reschedule the Request at a given time in the future.
 type RequeueAfterError time.Duration
 // Error implements the error interface.
 func (r RequeueAfterError) Error() string {
 	return fmt.Sprintf("requeue at %s", time.Duration(r))
 }
 // RequeueAfter constructs a RequeueAfterError with the given duration
 // setting.
 func RequeueAfter(after time.Duration) error {
 	return RequeueAfterError(after)
 }
 // RequeueNow constructs a RequeueAfterError that reschedules the Request
 // immediately.
 func RequeueNow() error {
 	return RequeueAfterError(0)
 }
 // Reconciler is the main implementation interface for Controllers. A Reconciler
 // receives any change notifications for config entries that the controller is subscribed
 // to and processes them with its Reconcile function.
 type Reconciler interface {
 	// Reconcile performs a reconciliation on the config entry referred to by the Request.
 	// The Controller will requeue the Request to be processed again if an error is non-nil.
 	// If no error is returned, the Request will be removed from the working queue.
 	Reconcile(context.Context, Request) error
 }
--- a/agent/consul/controller/reconciler_test.go
+++ b/agent/consul/controller/reconciler_test.go
@ -0,0 +1,61 @@
 package controller
 import (
 	"context"
 	"sync"
 )
 type testReconciler struct {
 	received chan Request
 	response error
 	mutex    sync.Mutex
 	stepChan chan struct{}
 	stopChan chan struct{}
 	ctx      context.Context
 }
 func (r *testReconciler) Reconcile(ctx context.Context, req Request) error {
 	if r.stepChan != nil {
 		select {
 		case <-r.stopChan:
 			return nil
 		case <-r.stepChan:
 		}
 	}
 	select {
 	case <-r.stopChan:
 		return nil
 	case r.received <- req:
 	}
 	r.mutex.Lock()
 	defer r.mutex.Unlock()
 	return r.response
 }
 func (r *testReconciler) setResponse(err error) {
 	r.mutex.Lock()
 	defer r.mutex.Unlock()
 	r.response = err
 }
 func (r *testReconciler) step() {
 	r.stepChan <- struct{}{}
 }
 func (r *testReconciler) stop() {
 	close(r.stopChan)
 }
 func newTestReconciler(stepping bool) *testReconciler {
 	r := &testReconciler{
 		received: make(chan Request, 1000),
 		stopChan: make(chan struct{}),
 	}
 	if stepping {
 		r.stepChan = make(chan struct{})
 	}
 	return r
 }