Merge pull request #4325 from hashicorp/ca-pruning
connect/ca: add logic for pruning old stale RootCA entries
This commit is contained in:
commit
dacaf255b1
|
@ -5,6 +5,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/hashicorp/consul/acl"
|
"github.com/hashicorp/consul/acl"
|
||||||
"github.com/hashicorp/consul/agent/connect"
|
"github.com/hashicorp/consul/agent/connect"
|
||||||
|
@ -177,6 +178,7 @@ func (s *ConnectCA) ConfigurationSet(
|
||||||
newRoot := *r
|
newRoot := *r
|
||||||
if newRoot.Active {
|
if newRoot.Active {
|
||||||
newRoot.Active = false
|
newRoot.Active = false
|
||||||
|
newRoot.RotatedOutAt = time.Now()
|
||||||
}
|
}
|
||||||
newRoots = append(newRoots, &newRoot)
|
newRoots = append(newRoots, &newRoot)
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,18 @@ const (
|
||||||
barrierWriteTimeout = 2 * time.Minute
|
barrierWriteTimeout = 2 * time.Minute
|
||||||
)
|
)
|
||||||
|
|
||||||
var minAutopilotVersion = version.Must(version.NewVersion("0.8.0"))
|
var (
|
||||||
|
// caRootPruneInterval is how often we check for stale CARoots to remove.
|
||||||
|
caRootPruneInterval = time.Hour
|
||||||
|
|
||||||
|
// caRootExpireDuration is the duration after which an inactive root is considered
|
||||||
|
// "expired". Currently this is based on the default leaf cert TTL of 3 days.
|
||||||
|
caRootExpireDuration = 7 * 24 * time.Hour
|
||||||
|
|
||||||
|
// minAutopilotVersion is the minimum Consul version in which Autopilot features
|
||||||
|
// are supported.
|
||||||
|
minAutopilotVersion = version.Must(version.NewVersion("0.8.0"))
|
||||||
|
)
|
||||||
|
|
||||||
// monitorLeadership is used to monitor if we acquire or lose our role
|
// monitorLeadership is used to monitor if we acquire or lose our role
|
||||||
// as the leader in the Raft cluster. There is some work the leader is
|
// as the leader in the Raft cluster. There is some work the leader is
|
||||||
|
@ -220,6 +231,8 @@ func (s *Server) establishLeadership() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s.startCARootPruning()
|
||||||
|
|
||||||
s.setConsistentReadReady()
|
s.setConsistentReadReady()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -236,6 +249,8 @@ func (s *Server) revokeLeadership() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s.stopCARootPruning()
|
||||||
|
|
||||||
s.setCAProvider(nil, nil)
|
s.setCAProvider(nil, nil)
|
||||||
|
|
||||||
s.resetConsistentReadReady()
|
s.resetConsistentReadReady()
|
||||||
|
@ -550,6 +565,92 @@ func (s *Server) setCAProvider(newProvider ca.Provider, root *structs.CARoot) {
|
||||||
s.caProviderRoot = root
|
s.caProviderRoot = root
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// startCARootPruning starts a goroutine that looks for stale CARoots
|
||||||
|
// and removes them from the state store.
|
||||||
|
func (s *Server) startCARootPruning() {
|
||||||
|
s.caPruningLock.Lock()
|
||||||
|
defer s.caPruningLock.Unlock()
|
||||||
|
|
||||||
|
if s.caPruningEnabled {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
s.caPruningCh = make(chan struct{})
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
ticker := time.NewTicker(caRootPruneInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-s.caPruningCh:
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
if err := s.pruneCARoots(); err != nil {
|
||||||
|
s.logger.Printf("[ERR] connect: error pruning CA roots: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
s.caPruningEnabled = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// pruneCARoots looks for any CARoots that have been rotated out and expired.
|
||||||
|
func (s *Server) pruneCARoots() error {
|
||||||
|
if !s.config.ConnectEnabled {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
idx, roots, err := s.fsm.State().CARoots(nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var newRoots structs.CARoots
|
||||||
|
for _, r := range roots {
|
||||||
|
if !r.Active && !r.RotatedOutAt.IsZero() && time.Now().Sub(r.RotatedOutAt) > caRootExpireDuration {
|
||||||
|
s.logger.Printf("[INFO] connect: pruning old unused root CA (ID: %s)", r.ID)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
newRoot := *r
|
||||||
|
newRoots = append(newRoots, &newRoot)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return early if there's nothing to remove.
|
||||||
|
if len(newRoots) == len(roots) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Commit the new root state.
|
||||||
|
var args structs.CARequest
|
||||||
|
args.Op = structs.CAOpSetRoots
|
||||||
|
args.Index = idx
|
||||||
|
args.Roots = newRoots
|
||||||
|
resp, err := s.raftApply(structs.ConnectCARequestType, args)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if respErr, ok := resp.(error); ok {
|
||||||
|
return respErr
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// stopCARootPruning stops the CARoot pruning process.
|
||||||
|
func (s *Server) stopCARootPruning() {
|
||||||
|
s.caPruningLock.Lock()
|
||||||
|
defer s.caPruningLock.Unlock()
|
||||||
|
|
||||||
|
if !s.caPruningEnabled {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
close(s.caPruningCh)
|
||||||
|
s.caPruningEnabled = false
|
||||||
|
}
|
||||||
|
|
||||||
// reconcileReaped is used to reconcile nodes that have failed and been reaped
|
// reconcileReaped is used to reconcile nodes that have failed and been reaped
|
||||||
// from Serf but remain in the catalog. This is done by looking for unknown nodes with serfHealth checks registered.
|
// from Serf but remain in the catalog. This is done by looking for unknown nodes with serfHealth checks registered.
|
||||||
// We generate a "reap" event to cause the node to be cleaned up.
|
// We generate a "reap" event to cause the node to be cleaned up.
|
||||||
|
|
|
@ -5,12 +5,14 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/agent/connect"
|
||||||
"github.com/hashicorp/consul/agent/structs"
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
"github.com/hashicorp/consul/api"
|
"github.com/hashicorp/consul/api"
|
||||||
"github.com/hashicorp/consul/testrpc"
|
"github.com/hashicorp/consul/testrpc"
|
||||||
"github.com/hashicorp/consul/testutil/retry"
|
"github.com/hashicorp/consul/testutil/retry"
|
||||||
"github.com/hashicorp/net-rpc-msgpackrpc"
|
"github.com/hashicorp/net-rpc-msgpackrpc"
|
||||||
"github.com/hashicorp/serf/serf"
|
"github.com/hashicorp/serf/serf"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestLeader_RegisterMember(t *testing.T) {
|
func TestLeader_RegisterMember(t *testing.T) {
|
||||||
|
@ -1001,3 +1003,64 @@ func TestLeader_ACL_Initialization(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLeader_CARootPruning(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
caRootExpireDuration = 500 * time.Millisecond
|
||||||
|
caRootPruneInterval = 200 * time.Millisecond
|
||||||
|
|
||||||
|
require := require.New(t)
|
||||||
|
dir1, s1 := testServer(t)
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
codec := rpcClient(t, s1)
|
||||||
|
defer codec.Close()
|
||||||
|
|
||||||
|
testrpc.WaitForLeader(t, s1.RPC, "dc1")
|
||||||
|
|
||||||
|
// Get the current root
|
||||||
|
rootReq := &structs.DCSpecificRequest{
|
||||||
|
Datacenter: "dc1",
|
||||||
|
}
|
||||||
|
var rootList structs.IndexedCARoots
|
||||||
|
require.Nil(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", rootReq, &rootList))
|
||||||
|
require.Len(rootList.Roots, 1)
|
||||||
|
oldRoot := rootList.Roots[0]
|
||||||
|
|
||||||
|
// Update the provider config to use a new private key, which should
|
||||||
|
// cause a rotation.
|
||||||
|
_, newKey, err := connect.GeneratePrivateKey()
|
||||||
|
require.NoError(err)
|
||||||
|
newConfig := &structs.CAConfiguration{
|
||||||
|
Provider: "consul",
|
||||||
|
Config: map[string]interface{}{
|
||||||
|
"PrivateKey": newKey,
|
||||||
|
"RootCert": "",
|
||||||
|
"RotationPeriod": 90 * 24 * time.Hour,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
{
|
||||||
|
args := &structs.CARequest{
|
||||||
|
Datacenter: "dc1",
|
||||||
|
Config: newConfig,
|
||||||
|
}
|
||||||
|
var reply interface{}
|
||||||
|
|
||||||
|
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should have 2 roots now.
|
||||||
|
_, roots, err := s1.fsm.State().CARoots(nil)
|
||||||
|
require.NoError(err)
|
||||||
|
require.Len(roots, 2)
|
||||||
|
|
||||||
|
time.Sleep(caRootExpireDuration * 2)
|
||||||
|
|
||||||
|
// Now the old root should be pruned.
|
||||||
|
_, roots, err = s1.fsm.State().CARoots(nil)
|
||||||
|
require.NoError(err)
|
||||||
|
require.Len(roots, 1)
|
||||||
|
require.True(roots[0].Active)
|
||||||
|
require.NotEqual(roots[0].ID, oldRoot.ID)
|
||||||
|
}
|
||||||
|
|
|
@ -107,6 +107,12 @@ type Server struct {
|
||||||
caProviderRoot *structs.CARoot
|
caProviderRoot *structs.CARoot
|
||||||
caProviderLock sync.RWMutex
|
caProviderLock sync.RWMutex
|
||||||
|
|
||||||
|
// caPruningCh is used to shut down the CA root pruning goroutine when we
|
||||||
|
// lose leadership.
|
||||||
|
caPruningCh chan struct{}
|
||||||
|
caPruningLock sync.RWMutex
|
||||||
|
caPruningEnabled bool
|
||||||
|
|
||||||
// Consul configuration
|
// Consul configuration
|
||||||
config *Config
|
config *Config
|
||||||
|
|
||||||
|
|
|
@ -73,6 +73,11 @@ type CARoot struct {
|
||||||
// cannot be active.
|
// cannot be active.
|
||||||
Active bool
|
Active bool
|
||||||
|
|
||||||
|
// RotatedOutAt is the time at which this CA was removed from the state.
|
||||||
|
// This will only be set on roots that have been rotated out from being the
|
||||||
|
// active root.
|
||||||
|
RotatedOutAt time.Time `json:"-"`
|
||||||
|
|
||||||
RaftIndex
|
RaftIndex
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue