From 55456fc8230cb5e8b787ffb88cc19420bc76caf0 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Tue, 30 Oct 2018 10:19:25 -0400 Subject: [PATCH] Set a 1s floor for Vault renew operation backoff --- nomad/vault.go | 73 +++++++++++++++++++++++++++------------------ nomad/vault_test.go | 39 ++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 29 deletions(-) diff --git a/nomad/vault.go b/nomad/vault.go index 91fc30df0..b159f0611 100644 --- a/nomad/vault.go +++ b/nomad/vault.go @@ -491,33 +491,9 @@ func (v *vaultClient) renewalLoop() { break } - // Back off, increasing the amount of backoff each time. There are some rules: - // - // * If we have an existing authentication that is going to expire, - // never back off more than half of the amount of time remaining - // until expiration - // * Never back off more than 30 seconds multiplied by a random - // value between 1 and 2 - // * Use randomness so that many clients won't keep hitting Vault - // at the same time - - // Set base values and add some backoff - v.logger.Warn("got error or bad auth, so backing off", "error", err) - switch { - case backoff < 5: - backoff = 5 - case backoff >= 24: - backoff = 30 - default: - backoff = backoff * 1.25 - } - - // Add randomness - backoff = backoff * (1.0 + rand.Float64()) - - maxBackoff := currentExpiration.Sub(time.Now()) / 2 - if maxBackoff < 0 { + backoff = nextBackoff(backoff, currentExpiration) + if backoff < 0 { // We have failed to renew the token past its expiration. Stop // renewing with Vault. v.logger.Error("failed to renew Vault token before lease expiration. Shutting down Vault client") @@ -526,9 +502,6 @@ func (v *vaultClient) renewalLoop() { v.connEstablishedErr = err v.l.Unlock() return - - } else if backoff > maxBackoff.Seconds() { - backoff = maxBackoff.Seconds() } durationUntilRetry := time.Duration(backoff) * time.Second @@ -539,6 +512,48 @@ func (v *vaultClient) renewalLoop() { } } +// nextBackoff returns the delay for the next auto renew interval, in seconds. +// Returns negative value if past expiration +// +// It should increaes the amount of backoff each time, with the following rules: +// +// * If we have an existing authentication that is going to expire, +// never back off more than half of the amount of time remaining +// until expiration (with 1s floor) +// * Never back off more than 30 seconds multiplied by a random +// value between 1 and 2 +// * Use randomness so that many clients won't keep hitting Vault +// at the same time +func nextBackoff(backoff float64, expiry time.Time) float64 { + maxBackoff := time.Until(expiry) / 2 + if maxBackoff < 0 { + return -1 + } + + switch { + case backoff < 5: + backoff = 5 + case backoff >= 24: + backoff = 30 + default: + backoff = backoff * 1.25 + } + + // Add randomness + backoff = backoff * (1.0 + rand.Float64()) + + if backoff > maxBackoff.Seconds() { + backoff = maxBackoff.Seconds() + } + + // avoid hammering Vault + if backoff < 1 { + backoff = 1 + } + + return backoff +} + // renew attempts to renew our Vault token. If the renewal fails, an error is // returned. This method updates the lastRenewed time func (v *vaultClient) renew() error { diff --git a/nomad/vault_test.go b/nomad/vault_test.go index f7fdbddb8..191eea9df 100644 --- a/nomad/vault_test.go +++ b/nomad/vault_test.go @@ -1258,3 +1258,42 @@ func waitForConnection(v *vaultClient, t *testing.T) { t.Fatalf("Connection not established") }) } + +func TestVaultClient_nextBackoff(t *testing.T) { + simpleCases := []struct { + name string + initBackoff float64 + + // define range of acceptable backoff values accounting for random factor + rangeMin float64 + rangeMax float64 + }{ + {"simple case", 7.0, 8.7, 17.60}, + {"too low", 2.0, 5.0, 10.0}, + {"too large", 100, 30.0, 60.0}, + } + + for _, c := range simpleCases { + t.Run(c.name, func(t *testing.T) { + b := nextBackoff(c.initBackoff, time.Now().Add(10*time.Hour)) + if !(c.rangeMin <= b && b <= c.rangeMax) { + t.Fatalf("Expected backoff within [%v, %v] but found %v", c.rangeMin, c.rangeMax, b) + } + }) + } + + // some edge cases + t.Run("close to expiry", func(t *testing.T) { + b := nextBackoff(20, time.Now().Add(1100*time.Millisecond)) + if !(1.0 <= b && b <= 3) { + t.Fatalf("Expected backoff within [1, 3] but found %v", b) + } + }) + + t.Run("past expiry", func(t *testing.T) { + b := nextBackoff(20, time.Now().Add(-1100*time.Millisecond)) + if b >= 0.0 { + t.Fatalf("Expected backoff to be negative but found %v", b) + } + }) +}