open-consul/agent/leafcert/util.go

64 lines
2.5 KiB
Go

package leafcert
import (
"time"
"github.com/hashicorp/consul/agent/structs"
)
// calculateSoftExpiry encapsulates our logic for when to renew a cert based on
// it's age. It returns a pair of times min, max which makes it easier to test
// the logic without non-deterministic jitter to account for. The caller should
// choose a time randomly in between these.
//
// We want to balance a few factors here:
// - renew too early and it increases the aggregate CSR rate in the cluster
// - renew too late and it risks disruption to the service if a transient
// error prevents the renewal
// - we want a broad amount of jitter so if there is an outage, we don't end
// up with all services in sync and causing a thundering herd every
// renewal period. Broader is better for smoothing requests but pushes
// both earlier and later tradeoffs above.
//
// Somewhat arbitrarily the current strategy looks like this:
//
// 0 60% 90%
// Issued [------------------------------|===============|!!!!!] Expires
// 72h TTL: 0 ~43h ~65h
// 1h TTL: 0 36m 54m
//
// Where |===| is the soft renewal period where we jitter for the first attempt
// and |!!!| is the danger zone where we just try immediately.
//
// In the happy path (no outages) the average renewal occurs half way through
// the soft renewal region or at 75% of the cert lifetime which is ~54 hours for
// a 72 hour cert, or 45 mins for a 1 hour cert.
//
// If we are already in the softRenewal period, we randomly pick a time between
// now and the start of the danger zone.
//
// We pass in now to make testing easier.
func calculateSoftExpiry(now time.Time, cert *structs.IssuedCert) (min time.Time, max time.Time) {
certLifetime := cert.ValidBefore.Sub(cert.ValidAfter)
if certLifetime < 10*time.Minute {
// Shouldn't happen as we limit to 1 hour shortest elsewhere but just be
// defensive against strange times or bugs.
return now, now
}
// Find the 60% mark in diagram above
softRenewTime := cert.ValidAfter.Add(time.Duration(float64(certLifetime) * 0.6))
hardRenewTime := cert.ValidAfter.Add(time.Duration(float64(certLifetime) * 0.9))
if now.After(hardRenewTime) {
// In the hard renew period, or already expired. Renew now!
return now, now
}
if now.After(softRenewTime) {
// Already in the soft renew period, make now the lower bound for jitter
softRenewTime = now
}
return softRenewTime, hardRenewTime
}