diff --git a/agent/connect/ca/provider_consul_config.go b/agent/connect/ca/provider_consul_config.go index 0ef5fe140..9c94f0e62 100644 --- a/agent/connect/ca/provider_consul_config.go +++ b/agent/connect/ca/provider_consul_config.go @@ -33,6 +33,10 @@ func ParseConsulCAConfig(raw map[string]interface{}) (*structs.ConsulCAProviderC return nil, fmt.Errorf("must provide a private key when providing a root cert") } + if err := config.CommonCAProviderConfig.Validate(); err != nil { + return nil, err + } + return &config, nil } diff --git a/agent/connect/ca/provider_vault.go b/agent/connect/ca/provider_vault.go index 82a6d5c1f..8c04edc0b 100644 --- a/agent/connect/ca/provider_vault.go +++ b/agent/connect/ca/provider_vault.go @@ -172,7 +172,7 @@ func (v *VaultProvider) GenerateIntermediate() (string, error) { "allow_any_name": true, "allowed_uri_sans": "spiffe://*", "key_type": "any", - "max_ttl": "72h", + "max_ttl": fmt.Sprintf("%.0fm", v.config.LeafCertTTL.Minutes()), "require_cn": false, }) if err != nil { @@ -227,7 +227,7 @@ func (v *VaultProvider) Sign(csr *x509.CertificateRequest) (string, error) { // Use the leaf cert role to sign a new cert for this CSR. response, err := v.client.Logical().Write(v.config.IntermediatePKIPath+"sign/"+VaultCALeafCertRole, map[string]interface{}{ "csr": pemBuf.String(), - "ttl": fmt.Sprintf("%.0fh", v.config.LeafCertTTL.Hours()), + "ttl": fmt.Sprintf("%.0fm", v.config.LeafCertTTL.Minutes()), }) if err != nil { return "", fmt.Errorf("error issuing cert: %v", err) @@ -321,5 +321,9 @@ func ParseVaultCAConfig(raw map[string]interface{}) (*structs.VaultCAProviderCon config.IntermediatePKIPath += "/" } + if err := config.CommonCAProviderConfig.Validate(); err != nil { + return nil, err + } + return &config, nil } diff --git a/agent/consul/leader.go b/agent/consul/leader.go index bab926997..e959e365e 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -32,10 +32,6 @@ var ( // caRootPruneInterval is how often we check for stale CARoots to remove. caRootPruneInterval = time.Hour - // caRootExpireDuration is the duration after which an inactive root is considered - // "expired". Currently this is based on the default leaf cert TTL of 3 days. - caRootExpireDuration = 7 * 24 * time.Hour - // minAutopilotVersion is the minimum Consul version in which Autopilot features // are supported. minAutopilotVersion = version.Must(version.NewVersion("0.8.0")) @@ -601,14 +597,25 @@ func (s *Server) pruneCARoots() error { return nil } - idx, roots, err := s.fsm.State().CARoots(nil) + state := s.fsm.State() + idx, roots, err := state.CARoots(nil) + if err != nil { + return err + } + + _, caConf, err := state.CAConfig() + if err != nil { + return err + } + + common, err := caConf.GetCommonConfig() if err != nil { return err } var newRoots structs.CARoots for _, r := range roots { - if !r.Active && !r.RotatedOutAt.IsZero() && time.Now().Sub(r.RotatedOutAt) > caRootExpireDuration { + if !r.Active && !r.RotatedOutAt.IsZero() && time.Now().Sub(r.RotatedOutAt) > common.LeafCertTTL*2 { s.logger.Printf("[INFO] connect: pruning old unused root CA (ID: %s)", r.ID) continue } diff --git a/agent/consul/leader_test.go b/agent/consul/leader_test.go index 18769ff87..c56477abf 100644 --- a/agent/consul/leader_test.go +++ b/agent/consul/leader_test.go @@ -1008,7 +1008,6 @@ func TestLeader_ACL_Initialization(t *testing.T) { func TestLeader_CARootPruning(t *testing.T) { t.Parallel() - caRootExpireDuration = 500 * time.Millisecond caRootPruneInterval = 200 * time.Millisecond require := require.New(t) @@ -1036,9 +1035,11 @@ func TestLeader_CARootPruning(t *testing.T) { newConfig := &structs.CAConfiguration{ Provider: "consul", Config: map[string]interface{}{ + "LeafCertTTL": 500 * time.Millisecond, "PrivateKey": newKey, "RootCert": "", "RotationPeriod": 90 * 24 * time.Hour, + "SkipValidate": true, }, } { @@ -1056,7 +1057,7 @@ func TestLeader_CARootPruning(t *testing.T) { require.NoError(err) require.Len(roots, 2) - time.Sleep(caRootExpireDuration * 2) + time.Sleep(2 * time.Second) // Now the old root should be pruned. _, roots, err = s1.fsm.State().CARoots(nil) diff --git a/agent/structs/connect_ca.go b/agent/structs/connect_ca.go index 353275d78..1e869fd45 100644 --- a/agent/structs/connect_ca.go +++ b/agent/structs/connect_ca.go @@ -1,7 +1,10 @@ package structs import ( + "fmt" "time" + + "github.com/mitchellh/mapstructure" ) // IndexedCARoots is the list of currently trusted CA Roots. @@ -192,8 +195,49 @@ type CAConfiguration struct { RaftIndex } +func (c *CAConfiguration) GetCommonConfig() (*CommonCAProviderConfig, error) { + if c == nil { + return nil, fmt.Errorf("config map was nil") + } + + var config CommonCAProviderConfig + decodeConf := &mapstructure.DecoderConfig{ + DecodeHook: mapstructure.StringToTimeDurationHookFunc(), + Result: &config, + } + + decoder, err := mapstructure.NewDecoder(decodeConf) + if err != nil { + return nil, err + } + + if err := decoder.Decode(c.Config); err != nil { + return nil, fmt.Errorf("error decoding config: %s", err) + } + + return &config, nil +} + type CommonCAProviderConfig struct { LeafCertTTL time.Duration + + SkipValidate bool +} + +func (c CommonCAProviderConfig) Validate() error { + if c.SkipValidate { + return nil + } + + if c.LeafCertTTL < time.Hour { + return fmt.Errorf("leaf cert TTL must be greater than 1h") + } + + if c.LeafCertTTL > 365*24*time.Hour { + return fmt.Errorf("leaf cert TTL must be less than 1 year") + } + + return nil } type ConsulCAProviderConfig struct { diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index ac93ccdbf..5f5258d8f 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -721,9 +721,16 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass

There are also a number of common configuration options supported by all providers:

- * `leaf_cert_ttl` The lease duration of - a leaf certificate issued for a service, after which a new certificate will be requested by the proxy. - Defaults to `72h`. + * `leaf_cert_ttl` The upper bound on the + lease duration of a leaf certificate issued for a service. In most cases a new leaf certificate will be + requested by a proxy before this limit is reached. This is also the effective limit on how long a server + outage can last (with no leader) before network connections will start being rejected, and as a result the + defaults is `72h` to last through a weekend without intervention. This value cannot be lower than 1 hour + or higher than 1 year. + + This value is also used when rotating out old root certificates from the cluster. When a root certificate + has been inactive (rotated out) for more than twice the *current* `leaf_cert_ttl`, it will be removed from + the trusted list. * `proxy` This object allows setting options for the Connect proxies. The following sub-keys are available: