Merge pull request #5479 from hashicorp/b-vault-renewal

vault: fix renewal time
This commit is contained in:
Michael Schurter 2019-04-16 12:20:26 -07:00 committed by GitHub
commit 3ba39e7c76
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 103 additions and 27 deletions

View file

@ -4,12 +4,16 @@ FEATURES:
* vault: Add initial support for Vault namespaces [[GH-5520](https://github.com/hashicorp/nomad/pull/5520)]
* allocations: Add support for restarting allocations in-place [[GH-5502](https://github.com/hashicorp/nomad/pull/5502)]
IMPROVEMENTS:
* core: Add node name to output of `nomad node status` command in verbose mode [[GH-5224](https://github.com/hashicorp/nomad/pull/5224)]
* core: Add `-verbose` flag to `nomad status` wrapper command [[GH-5516](https://github.com/hashicorp/nomad/pull/5516)]
BUG FIXES:
* vault: Fix renewal time to be 1/2 lease duration with jitter [[GH-5479](https://github.com/hashicorp/nomad/issues/5479)]
## 0.9.0 (April 9, 2019)
__BACKWARDS INCOMPATIBILITIES:__

View file

@ -194,28 +194,36 @@ func (c *vaultClient) isTracked(id string) bool {
return ok
}
// isRunning returns true if the client is running.
func (c *vaultClient) isRunning() bool {
c.lock.RLock()
defer c.lock.RUnlock()
return c.running
}
// Starts the renewal loop of vault client
func (c *vaultClient) Start() {
c.lock.Lock()
defer c.lock.Unlock()
if !c.config.IsEnabled() || c.running {
return
}
c.lock.Lock()
c.running = true
c.lock.Unlock()
go c.run()
}
// Stops the renewal loop of vault client
func (c *vaultClient) Stop() {
c.lock.Lock()
defer c.lock.Unlock()
if !c.config.IsEnabled() || !c.running {
return
}
c.lock.Lock()
defer c.lock.Unlock()
c.running = false
close(c.stopCh)
}
@ -235,7 +243,7 @@ func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string)
if !c.config.IsEnabled() {
return nil, fmt.Errorf("vault client not enabled")
}
if !c.running {
if !c.isRunning() {
return nil, fmt.Errorf("vault client is not running")
}
@ -421,21 +429,9 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
}
}
duration := leaseDuration / 2
switch {
case leaseDuration < 30:
// Don't bother about introducing randomness if the
// leaseDuration is too small.
default:
// Give a breathing space of 20 seconds
min := 10
max := leaseDuration - min
rand.Seed(time.Now().Unix())
duration = min + rand.Intn(max-min)
}
// Determine the next renewal time
next := time.Now().Add(time.Duration(duration) * time.Second)
renewalDuration := renewalTime(rand.Intn, leaseDuration)
next := time.Now().Add(renewalDuration)
fatal := false
if renewalErr != nil &&
@ -445,7 +441,7 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
strings.Contains(renewalErr.Error(), "permission denied")) {
fatal = true
} else if renewalErr != nil {
c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "duration", duration)
c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "renewal_duration", renewalDuration)
c.logger.Error("error during renewal of lease or token failed due to a non-fatal error; retrying",
"error", renewalErr, "period", next)
}
@ -517,7 +513,7 @@ func (c *vaultClient) run() {
}
var renewalCh <-chan time.Time
for c.config.IsEnabled() && c.running {
for c.config.IsEnabled() && c.isRunning() {
// Fetches the candidate for next renewal
renewalReq, renewalTime := c.nextRenewal()
if renewalTime.IsZero() {
@ -728,3 +724,31 @@ func (h *vaultDataHeapImp) Pop() interface{} {
*h = old[0 : n-1]
return entry
}
// randIntn is the function in math/rand needed by renewalTime. A type is used
// to ease deterministic testing.
type randIntn func(int) int
// renewalTime returns when a token should be renewed given its leaseDuration
// and a randomizer to provide jitter.
//
// Leases < 1m will be not jitter.
func renewalTime(dice randIntn, leaseDuration int) time.Duration {
// Start trying to renew at half the lease duration to allow ample time
// for latency and retries.
renew := leaseDuration / 2
// Don't bother about introducing randomness if the
// leaseDuration is too small.
const cutoff = 30
if renew < cutoff {
return time.Duration(renew) * time.Second
}
// jitter is the amount +/- to vary the renewal time
const jitter = 10
min := renew - jitter
renew = min + dice(jitter*2)
return time.Duration(renew) * time.Second
}

View file

@ -10,6 +10,7 @@ import (
"github.com/hashicorp/nomad/testutil"
vaultapi "github.com/hashicorp/vault/api"
vaultconsts "github.com/hashicorp/vault/helper/consts"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -76,8 +77,11 @@ func TestVaultClient_TokenRenewals(t *testing.T) {
}(errCh)
}
if c.heap.Length() != num {
t.Fatalf("bad: heap length: expected: %d, actual: %d", num, c.heap.Length())
c.lock.Lock()
length := c.heap.Length()
c.lock.Unlock()
if length != num {
t.Fatalf("bad: heap length: expected: %d, actual: %d", num, length)
}
time.Sleep(time.Duration(testutil.TestMultiplier()) * time.Second)
@ -88,8 +92,11 @@ func TestVaultClient_TokenRenewals(t *testing.T) {
}
}
if c.heap.Length() != 0 {
t.Fatalf("bad: heap length: expected: 0, actual: %d", c.heap.Length())
c.lock.Lock()
length = c.heap.Length()
c.lock.Unlock()
if length != 0 {
t.Fatalf("bad: heap length: expected: 0, actual: %d", length)
}
}
@ -300,3 +307,44 @@ func TestVaultClient_RenewNonexistentLease(t *testing.T) {
t.Fatalf("expected \"%s\" or \"%s\" in error message, got \"%v\"", "lease not found", "lease is not renewable", err.Error())
}
}
// TestVaultClient_RenewalTime_Long asserts that for leases over 1m the renewal
// time is jittered.
func TestVaultClient_RenewalTime_Long(t *testing.T) {
t.Parallel()
// highRoller is a randIntn func that always returns the max value
highRoller := func(n int) int {
return n - 1
}
// lowRoller is a randIntn func that always returns the min value (0)
lowRoller := func(int) int {
return 0
}
assert.Equal(t, 39*time.Second, renewalTime(highRoller, 60))
assert.Equal(t, 20*time.Second, renewalTime(lowRoller, 60))
assert.Equal(t, 309*time.Second, renewalTime(highRoller, 600))
assert.Equal(t, 290*time.Second, renewalTime(lowRoller, 600))
const days3 = 60 * 60 * 24 * 3
assert.Equal(t, (days3/2+9)*time.Second, renewalTime(highRoller, days3))
assert.Equal(t, (days3/2-10)*time.Second, renewalTime(lowRoller, days3))
}
// TestVaultClient_RenewalTime_Short asserts that for leases under 1m the renewal
// time is lease/2.
func TestVaultClient_RenewalTime_Short(t *testing.T) {
t.Parallel()
dice := func(int) int {
require.Fail(t, "dice should not have been called")
panic("unreachable")
}
assert.Equal(t, 29*time.Second, renewalTime(dice, 58))
assert.Equal(t, 15*time.Second, renewalTime(dice, 30))
assert.Equal(t, 1*time.Second, renewalTime(dice, 2))
}