Set a 1s floor for Vault renew operation backoff

This commit is contained in:
Mahmood Ali 2018-10-30 10:19:25 -04:00
parent 5b10da5de6
commit 55456fc823
2 changed files with 83 additions and 29 deletions

View File

@ -491,33 +491,9 @@ func (v *vaultClient) renewalLoop() {
break
}
// Back off, increasing the amount of backoff each time. There are some rules:
//
// * If we have an existing authentication that is going to expire,
// never back off more than half of the amount of time remaining
// until expiration
// * Never back off more than 30 seconds multiplied by a random
// value between 1 and 2
// * Use randomness so that many clients won't keep hitting Vault
// at the same time
// Set base values and add some backoff
v.logger.Warn("got error or bad auth, so backing off", "error", err)
switch {
case backoff < 5:
backoff = 5
case backoff >= 24:
backoff = 30
default:
backoff = backoff * 1.25
}
// Add randomness
backoff = backoff * (1.0 + rand.Float64())
maxBackoff := currentExpiration.Sub(time.Now()) / 2
if maxBackoff < 0 {
backoff = nextBackoff(backoff, currentExpiration)
if backoff < 0 {
// We have failed to renew the token past its expiration. Stop
// renewing with Vault.
v.logger.Error("failed to renew Vault token before lease expiration. Shutting down Vault client")
@ -526,9 +502,6 @@ func (v *vaultClient) renewalLoop() {
v.connEstablishedErr = err
v.l.Unlock()
return
} else if backoff > maxBackoff.Seconds() {
backoff = maxBackoff.Seconds()
}
durationUntilRetry := time.Duration(backoff) * time.Second
@ -539,6 +512,48 @@ func (v *vaultClient) renewalLoop() {
}
}
// nextBackoff returns the delay for the next auto renew interval, in seconds.
// Returns negative value if past expiration
//
// It should increaes the amount of backoff each time, with the following rules:
//
// * If we have an existing authentication that is going to expire,
// never back off more than half of the amount of time remaining
// until expiration (with 1s floor)
// * Never back off more than 30 seconds multiplied by a random
// value between 1 and 2
// * Use randomness so that many clients won't keep hitting Vault
// at the same time
func nextBackoff(backoff float64, expiry time.Time) float64 {
maxBackoff := time.Until(expiry) / 2
if maxBackoff < 0 {
return -1
}
switch {
case backoff < 5:
backoff = 5
case backoff >= 24:
backoff = 30
default:
backoff = backoff * 1.25
}
// Add randomness
backoff = backoff * (1.0 + rand.Float64())
if backoff > maxBackoff.Seconds() {
backoff = maxBackoff.Seconds()
}
// avoid hammering Vault
if backoff < 1 {
backoff = 1
}
return backoff
}
// renew attempts to renew our Vault token. If the renewal fails, an error is
// returned. This method updates the lastRenewed time
func (v *vaultClient) renew() error {

View File

@ -1258,3 +1258,42 @@ func waitForConnection(v *vaultClient, t *testing.T) {
t.Fatalf("Connection not established")
})
}
func TestVaultClient_nextBackoff(t *testing.T) {
simpleCases := []struct {
name string
initBackoff float64
// define range of acceptable backoff values accounting for random factor
rangeMin float64
rangeMax float64
}{
{"simple case", 7.0, 8.7, 17.60},
{"too low", 2.0, 5.0, 10.0},
{"too large", 100, 30.0, 60.0},
}
for _, c := range simpleCases {
t.Run(c.name, func(t *testing.T) {
b := nextBackoff(c.initBackoff, time.Now().Add(10*time.Hour))
if !(c.rangeMin <= b && b <= c.rangeMax) {
t.Fatalf("Expected backoff within [%v, %v] but found %v", c.rangeMin, c.rangeMax, b)
}
})
}
// some edge cases
t.Run("close to expiry", func(t *testing.T) {
b := nextBackoff(20, time.Now().Add(1100*time.Millisecond))
if !(1.0 <= b && b <= 3) {
t.Fatalf("Expected backoff within [1, 3] but found %v", b)
}
})
t.Run("past expiry", func(t *testing.T) {
b := nextBackoff(20, time.Now().Add(-1100*time.Millisecond))
if b >= 0.0 {
t.Fatalf("Expected backoff to be negative but found %v", b)
}
})
}