Merge pull request #4325 from hashicorp/ca-pruning

connect/ca: add logic for pruning old stale RootCA entries
This commit is contained in:
Kyle Havlovitz 2018-07-09 09:05:41 -07:00 committed by GitHub
commit dacaf255b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 178 additions and 1 deletions

View File

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"reflect" "reflect"
"strings" "strings"
"time"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/connect"
@ -177,6 +178,7 @@ func (s *ConnectCA) ConfigurationSet(
newRoot := *r newRoot := *r
if newRoot.Active { if newRoot.Active {
newRoot.Active = false newRoot.Active = false
newRoot.RotatedOutAt = time.Now()
} }
newRoots = append(newRoots, &newRoot) newRoots = append(newRoots, &newRoot)
} }

View File

@ -28,7 +28,18 @@ const (
barrierWriteTimeout = 2 * time.Minute barrierWriteTimeout = 2 * time.Minute
) )
var minAutopilotVersion = version.Must(version.NewVersion("0.8.0")) var (
// caRootPruneInterval is how often we check for stale CARoots to remove.
caRootPruneInterval = time.Hour
// caRootExpireDuration is the duration after which an inactive root is considered
// "expired". Currently this is based on the default leaf cert TTL of 3 days.
caRootExpireDuration = 7 * 24 * time.Hour
// minAutopilotVersion is the minimum Consul version in which Autopilot features
// are supported.
minAutopilotVersion = version.Must(version.NewVersion("0.8.0"))
)
// monitorLeadership is used to monitor if we acquire or lose our role // monitorLeadership is used to monitor if we acquire or lose our role
// as the leader in the Raft cluster. There is some work the leader is // as the leader in the Raft cluster. There is some work the leader is
@ -220,6 +231,8 @@ func (s *Server) establishLeadership() error {
return err return err
} }
s.startCARootPruning()
s.setConsistentReadReady() s.setConsistentReadReady()
return nil return nil
} }
@ -236,6 +249,8 @@ func (s *Server) revokeLeadership() error {
return err return err
} }
s.stopCARootPruning()
s.setCAProvider(nil, nil) s.setCAProvider(nil, nil)
s.resetConsistentReadReady() s.resetConsistentReadReady()
@ -550,6 +565,92 @@ func (s *Server) setCAProvider(newProvider ca.Provider, root *structs.CARoot) {
s.caProviderRoot = root s.caProviderRoot = root
} }
// startCARootPruning starts a goroutine that looks for stale CARoots
// and removes them from the state store.
func (s *Server) startCARootPruning() {
s.caPruningLock.Lock()
defer s.caPruningLock.Unlock()
if s.caPruningEnabled {
return
}
s.caPruningCh = make(chan struct{})
go func() {
ticker := time.NewTicker(caRootPruneInterval)
defer ticker.Stop()
for {
select {
case <-s.caPruningCh:
return
case <-ticker.C:
if err := s.pruneCARoots(); err != nil {
s.logger.Printf("[ERR] connect: error pruning CA roots: %v", err)
}
}
}
}()
s.caPruningEnabled = true
}
// pruneCARoots looks for any CARoots that have been rotated out and expired.
func (s *Server) pruneCARoots() error {
if !s.config.ConnectEnabled {
return nil
}
idx, roots, err := s.fsm.State().CARoots(nil)
if err != nil {
return err
}
var newRoots structs.CARoots
for _, r := range roots {
if !r.Active && !r.RotatedOutAt.IsZero() && time.Now().Sub(r.RotatedOutAt) > caRootExpireDuration {
s.logger.Printf("[INFO] connect: pruning old unused root CA (ID: %s)", r.ID)
continue
}
newRoot := *r
newRoots = append(newRoots, &newRoot)
}
// Return early if there's nothing to remove.
if len(newRoots) == len(roots) {
return nil
}
// Commit the new root state.
var args structs.CARequest
args.Op = structs.CAOpSetRoots
args.Index = idx
args.Roots = newRoots
resp, err := s.raftApply(structs.ConnectCARequestType, args)
if err != nil {
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
return nil
}
// stopCARootPruning stops the CARoot pruning process.
func (s *Server) stopCARootPruning() {
s.caPruningLock.Lock()
defer s.caPruningLock.Unlock()
if !s.caPruningEnabled {
return
}
close(s.caPruningCh)
s.caPruningEnabled = false
}
// reconcileReaped is used to reconcile nodes that have failed and been reaped // reconcileReaped is used to reconcile nodes that have failed and been reaped
// from Serf but remain in the catalog. This is done by looking for unknown nodes with serfHealth checks registered. // from Serf but remain in the catalog. This is done by looking for unknown nodes with serfHealth checks registered.
// We generate a "reap" event to cause the node to be cleaned up. // We generate a "reap" event to cause the node to be cleaned up.

View File

@ -5,12 +5,14 @@ import (
"testing" "testing"
"time" "time"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/api" "github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/testrpc" "github.com/hashicorp/consul/testrpc"
"github.com/hashicorp/consul/testutil/retry" "github.com/hashicorp/consul/testutil/retry"
"github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/hashicorp/serf/serf" "github.com/hashicorp/serf/serf"
"github.com/stretchr/testify/require"
) )
func TestLeader_RegisterMember(t *testing.T) { func TestLeader_RegisterMember(t *testing.T) {
@ -1001,3 +1003,64 @@ func TestLeader_ACL_Initialization(t *testing.T) {
}) })
} }
} }
func TestLeader_CARootPruning(t *testing.T) {
t.Parallel()
caRootExpireDuration = 500 * time.Millisecond
caRootPruneInterval = 200 * time.Millisecond
require := require.New(t)
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
// Get the current root
rootReq := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var rootList structs.IndexedCARoots
require.Nil(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", rootReq, &rootList))
require.Len(rootList.Roots, 1)
oldRoot := rootList.Roots[0]
// Update the provider config to use a new private key, which should
// cause a rotation.
_, newKey, err := connect.GeneratePrivateKey()
require.NoError(err)
newConfig := &structs.CAConfiguration{
Provider: "consul",
Config: map[string]interface{}{
"PrivateKey": newKey,
"RootCert": "",
"RotationPeriod": 90 * 24 * time.Hour,
},
}
{
args := &structs.CARequest{
Datacenter: "dc1",
Config: newConfig,
}
var reply interface{}
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply))
}
// Should have 2 roots now.
_, roots, err := s1.fsm.State().CARoots(nil)
require.NoError(err)
require.Len(roots, 2)
time.Sleep(caRootExpireDuration * 2)
// Now the old root should be pruned.
_, roots, err = s1.fsm.State().CARoots(nil)
require.NoError(err)
require.Len(roots, 1)
require.True(roots[0].Active)
require.NotEqual(roots[0].ID, oldRoot.ID)
}

View File

@ -107,6 +107,12 @@ type Server struct {
caProviderRoot *structs.CARoot caProviderRoot *structs.CARoot
caProviderLock sync.RWMutex caProviderLock sync.RWMutex
// caPruningCh is used to shut down the CA root pruning goroutine when we
// lose leadership.
caPruningCh chan struct{}
caPruningLock sync.RWMutex
caPruningEnabled bool
// Consul configuration // Consul configuration
config *Config config *Config

View File

@ -73,6 +73,11 @@ type CARoot struct {
// cannot be active. // cannot be active.
Active bool Active bool
// RotatedOutAt is the time at which this CA was removed from the state.
// This will only be set on roots that have been rotated out from being the
// active root.
RotatedOutAt time.Time `json:"-"`
RaftIndex RaftIndex
} }