From 51f079dcdde4f0c30620f5a5a5e1ac723050dace Mon Sep 17 00:00:00 2001
From: Hans Hasselberg <me@hans.io>
Date: Fri, 4 Sep 2020 11:47:16 +0200
Subject: [PATCH] secondaryIntermediateCertRenewalWatch abort on success
 (#8588)

secondaryIntermediateCertRenewalWatch was using `retryLoopBackoff` to
renew the intermediate certificate. Once it entered the inner loop and
started `retryLoopBackoff` it would never leave that.
`retryLoopBackoffAbortOnSuccess` will return when renewing is
successful, like it was intended originally.
---
 agent/consul/leader_connect.go      | 12 ++++++++-
 agent/consul/leader_connect_test.go | 41 +++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/agent/consul/leader_connect.go b/agent/consul/leader_connect.go
index fcad53d65..3a7a1ce97 100644
--- a/agent/consul/leader_connect.go
+++ b/agent/consul/leader_connect.go
@@ -653,7 +653,7 @@ func (s *Server) secondaryIntermediateCertRenewalWatch(ctx context.Context) erro
 		case <-ctx.Done():
 			return nil
 		case <-time.After(structs.IntermediateCertRenewInterval):
-			retryLoopBackoff(ctx, func() error {
+			retryLoopBackoffAbortOnSuccess(ctx, func() error {
 				s.caProviderReconfigurationLock.Lock()
 				defer s.caProviderReconfigurationLock.Unlock()
 
@@ -835,6 +835,14 @@ func (s *Server) replicateIntentions(ctx context.Context) error {
 // retryLoopBackoff loops a given function indefinitely, backing off exponentially
 // upon errors up to a maximum of maxRetryBackoff seconds.
 func retryLoopBackoff(ctx context.Context, loopFn func() error, errFn func(error)) {
+	retryLoopBackoffHandleSuccess(ctx, loopFn, errFn, false)
+}
+
+func retryLoopBackoffAbortOnSuccess(ctx context.Context, loopFn func() error, errFn func(error)) {
+	retryLoopBackoffHandleSuccess(ctx, loopFn, errFn, true)
+}
+
+func retryLoopBackoffHandleSuccess(ctx context.Context, loopFn func() error, errFn func(error), abortOnSuccess bool) {
 	var failedAttempts uint
 	limiter := rate.NewLimiter(loopRateLimit, retryBucketSize)
 	for {
@@ -861,6 +869,8 @@ func retryLoopBackoff(ctx context.Context, loopFn func() error, errFn func(error
 			case <-timer.C:
 				continue
 			}
+		} else if abortOnSuccess {
+			return
 		}
 
 		// Reset the failed attempts after a successful run.
diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go
index 0f8edc470..c4852d57a 100644
--- a/agent/consul/leader_connect_test.go
+++ b/agent/consul/leader_connect_test.go
@@ -1,6 +1,7 @@
 package consul
 
 import (
+	"context"
 	"crypto/x509"
 	"fmt"
 	"io/ioutil"
@@ -1442,3 +1443,43 @@ func TestLeader_lessThanHalfTimePassed(t *testing.T) {
 
 	require.True(t, lessThanHalfTimePassed(now, now.Add(-10*time.Second), now.Add(20*time.Second)))
 }
+
+func TestLeader_retryLoopBackoffHandleSuccess(t *testing.T) {
+	type test struct {
+		desc     string
+		loopFn   func() error
+		abort    bool
+		timedOut bool
+	}
+	success := func() error {
+		return nil
+	}
+	failure := func() error {
+		return fmt.Errorf("test error")
+	}
+	tests := []test{
+		{"loop without error and no abortOnSuccess keeps running", success, false, true},
+		{"loop with error and no abortOnSuccess keeps running", failure, false, true},
+		{"loop without error and abortOnSuccess is stopped", success, true, false},
+		{"loop with error and abortOnSuccess keeps running", failure, true, true},
+	}
+	for _, tc := range tests {
+		tc := tc
+		t.Run(tc.desc, func(t *testing.T) {
+			ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
+			defer cancel()
+
+			retryLoopBackoffHandleSuccess(ctx, tc.loopFn, func(_ error) {}, tc.abort)
+			select {
+			case <-ctx.Done():
+				if !tc.timedOut {
+					t.Fatal("should not have timed out")
+				}
+			default:
+				if tc.timedOut {
+					t.Fatal("should have timed out")
+				}
+			}
+		})
+	}
+}