open-consul/agent/consul/connect_ca_endpoint_test.go

775 lines
22 KiB
Go
Raw Normal View History

2018-03-20 03:29:14 +00:00
package consul
import (
"crypto/x509"
"encoding/pem"
2018-05-08 13:23:44 +00:00
"fmt"
2018-03-20 03:29:14 +00:00
"os"
"sync"
2018-03-20 03:29:14 +00:00
"testing"
2018-04-30 03:44:40 +00:00
"time"
2018-03-20 03:29:14 +00:00
2018-05-08 13:23:44 +00:00
"github.com/stretchr/testify/require"
2018-03-20 03:29:14 +00:00
"github.com/hashicorp/consul/agent/connect"
2018-05-09 22:12:31 +00:00
ca "github.com/hashicorp/consul/agent/connect/ca"
2018-03-20 03:29:14 +00:00
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/hashicorp/consul/testrpc"
msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc"
2018-03-20 03:29:14 +00:00
"github.com/stretchr/testify/assert"
)
2018-04-30 03:44:40 +00:00
func testParseCert(t *testing.T, pemValue string) *x509.Certificate {
cert, err := connect.ParseCert(pemValue)
if err != nil {
t.Fatal(err)
}
return cert
}
2018-03-20 17:36:05 +00:00
// Test listing root CAs.
func TestConnectCARoots(t *testing.T) {
t.Parallel()
assert := assert.New(t)
2018-05-08 13:23:44 +00:00
require := require.New(t)
2018-03-20 17:36:05 +00:00
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
testrpc.WaitForTestAgent(t, s1.RPC, "dc1")
2018-03-20 17:36:05 +00:00
// Insert some CAs
state := s1.fsm.State()
ca1 := connect.TestCA(t, nil)
ca2 := connect.TestCA(t, nil)
ca2.Active = false
2018-04-30 03:44:40 +00:00
idx, _, err := state.CARoots(nil)
2018-05-08 13:23:44 +00:00
require.NoError(err)
2018-04-30 03:44:40 +00:00
ok, err := state.CARootSetCAS(idx, idx, []*structs.CARoot{ca1, ca2})
assert.True(ok)
2018-05-08 13:23:44 +00:00
require.NoError(err)
_, caCfg, err := state.CAConfig(nil)
2018-05-08 13:23:44 +00:00
require.NoError(err)
2018-03-20 17:36:05 +00:00
// Request
args := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var reply structs.IndexedCARoots
2018-05-08 13:23:44 +00:00
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", args, &reply))
2018-03-20 17:36:05 +00:00
// Verify
assert.Equal(ca1.ID, reply.ActiveRootID)
assert.Len(reply.Roots, 2)
for _, r := range reply.Roots {
// These must never be set, for security
assert.Equal("", r.SigningCert)
assert.Equal("", r.SigningKey)
}
2018-05-08 13:23:44 +00:00
assert.Equal(fmt.Sprintf("%s.consul", caCfg.ClusterID), reply.TrustDomain)
2018-03-20 17:36:05 +00:00
}
2018-04-30 03:44:40 +00:00
func TestConnectCAConfig_GetSet(t *testing.T) {
t.Parallel()
assert := assert.New(t)
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
testrpc.WaitForTestAgent(t, s1.RPC, "dc1")
2018-04-30 03:44:40 +00:00
// Get the starting config
{
args := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var reply structs.CAConfiguration
assert.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationGet", args, &reply))
2018-05-09 22:12:31 +00:00
actual, err := ca.ParseConsulCAConfig(reply.Config)
2018-04-30 03:44:40 +00:00
assert.NoError(err)
2018-05-09 22:12:31 +00:00
expected, err := ca.ParseConsulCAConfig(s1.config.CAConfig.Config)
2018-04-30 03:44:40 +00:00
assert.NoError(err)
assert.Equal(reply.Provider, s1.config.CAConfig.Provider)
assert.Equal(actual, expected)
}
testState := map[string]string{"foo": "bar"}
2018-04-30 03:44:40 +00:00
// Update a config value
newConfig := &structs.CAConfiguration{
Provider: "consul",
Config: map[string]interface{}{
"PrivateKey": "",
"RootCert": "",
"RotationPeriod": 180 * 24 * time.Hour,
// This verifies the state persistence for providers although Consul
// provider doesn't actually use that mechanism outside of tests.
"test_state": testState,
2018-04-30 03:44:40 +00:00
},
}
{
args := &structs.CARequest{
Datacenter: "dc1",
Config: newConfig,
}
var reply interface{}
retry.Run(t, func(r *retry.R) {
r.Check(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply))
})
2018-04-30 03:44:40 +00:00
}
// Verify the new config was set
{
args := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var reply structs.CAConfiguration
assert.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationGet", args, &reply))
2018-05-09 22:12:31 +00:00
actual, err := ca.ParseConsulCAConfig(reply.Config)
2018-04-30 03:44:40 +00:00
assert.NoError(err)
2018-05-09 22:12:31 +00:00
expected, err := ca.ParseConsulCAConfig(newConfig.Config)
2018-04-30 03:44:40 +00:00
assert.NoError(err)
assert.Equal(reply.Provider, newConfig.Provider)
assert.Equal(actual, expected)
assert.Equal(testState, reply.State)
2018-04-30 03:44:40 +00:00
}
}
// This test case tests that the logic around forcing a rotation without cross
// signing works when requested (and is denied when not requested). This occurs
// if the current CA is not able to cross sign external CA certificates.
func TestConnectCAConfig_GetSetForceNoCrossSigning(t *testing.T) {
t.Parallel()
require := require.New(t)
// Setup a server with a built-in CA that as artificially disabled cross
// signing. This is simpler than running tests with external CA dependencies.
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.CAConfig.Config["DisableCrossSigning"] = true
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
testrpc.WaitForTestAgent(t, s1.RPC, "dc1")
// Store the current root
rootReq := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var rootList structs.IndexedCARoots
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", rootReq, &rootList))
require.Len(rootList.Roots, 1)
oldRoot := rootList.Roots[0]
// Get the starting config
{
args := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var reply structs.CAConfiguration
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationGet", args, &reply))
actual, err := ca.ParseConsulCAConfig(reply.Config)
require.NoError(err)
expected, err := ca.ParseConsulCAConfig(s1.config.CAConfig.Config)
require.NoError(err)
require.Equal(reply.Provider, s1.config.CAConfig.Provider)
require.Equal(actual, expected)
}
// Update to a new CA with different key. This should fail since the existing
// CA doesn't support cross signing so can't rotate safely.
_, newKey, err := connect.GeneratePrivateKey()
require.NoError(err)
newConfig := &structs.CAConfiguration{
Provider: "consul",
Config: map[string]interface{}{
"PrivateKey": newKey,
},
}
{
args := &structs.CARequest{
Datacenter: "dc1",
Config: newConfig,
}
var reply interface{}
err := msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply)
require.EqualError(err, "The current CA Provider does not support cross-signing. "+
"You can try again with ForceWithoutCrossSigningSet but this may cause disruption"+
" - see documentation for more.")
}
// Now try again with the force flag set and it should work
{
newConfig.ForceWithoutCrossSigning = true
args := &structs.CARequest{
Datacenter: "dc1",
Config: newConfig,
}
var reply interface{}
err := msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply)
require.NoError(err)
}
// Make sure the new root has been added but with no cross-signed intermediate
{
args := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var reply structs.IndexedCARoots
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", args, &reply))
require.Len(reply.Roots, 2)
for _, r := range reply.Roots {
if r.ID == oldRoot.ID {
// The old root should no longer be marked as the active root,
// and none of its other fields should have changed.
require.False(r.Active)
require.Equal(r.Name, oldRoot.Name)
require.Equal(r.RootCert, oldRoot.RootCert)
require.Equal(r.SigningCert, oldRoot.SigningCert)
require.Equal(r.IntermediateCerts, oldRoot.IntermediateCerts)
} else {
// The new root should NOT have a valid cross-signed cert from the old
// root as an intermediate.
require.True(r.Active)
require.Empty(r.IntermediateCerts)
}
}
}
}
2018-04-30 03:44:40 +00:00
func TestConnectCAConfig_TriggerRotation(t *testing.T) {
t.Parallel()
assert := assert.New(t)
require := require.New(t)
2018-04-30 03:44:40 +00:00
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
testrpc.WaitForTestAgent(t, s1.RPC, "dc1")
2018-04-30 03:44:40 +00:00
// Store the current root
rootReq := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var rootList structs.IndexedCARoots
require.Nil(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", rootReq, &rootList))
2018-04-30 03:44:40 +00:00
assert.Len(rootList.Roots, 1)
oldRoot := rootList.Roots[0]
// Update the provider config to use a new private key, which should
// cause a rotation.
2018-05-09 16:15:29 +00:00
_, newKey, err := connect.GeneratePrivateKey()
2018-04-30 03:44:40 +00:00
assert.NoError(err)
newConfig := &structs.CAConfiguration{
Provider: "consul",
Config: map[string]interface{}{
"PrivateKey": newKey,
"RootCert": "",
"RotationPeriod": 90 * 24 * time.Hour,
},
}
{
args := &structs.CARequest{
Datacenter: "dc1",
Config: newConfig,
}
var reply interface{}
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply))
2018-04-30 03:44:40 +00:00
}
// Make sure the new root has been added along with an intermediate
// cross-signed by the old root.
var newRootPEM string
2018-04-30 03:44:40 +00:00
{
args := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var reply structs.IndexedCARoots
require.Nil(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", args, &reply))
2018-04-30 03:44:40 +00:00
assert.Len(reply.Roots, 2)
for _, r := range reply.Roots {
if r.ID == oldRoot.ID {
// The old root should no longer be marked as the active root,
// and none of its other fields should have changed.
assert.False(r.Active)
assert.Equal(r.Name, oldRoot.Name)
assert.Equal(r.RootCert, oldRoot.RootCert)
assert.Equal(r.SigningCert, oldRoot.SigningCert)
assert.Equal(r.IntermediateCerts, oldRoot.IntermediateCerts)
} else {
newRootPEM = r.RootCert
2018-04-30 03:44:40 +00:00
// The new root should have a valid cross-signed cert from the old
// root as an intermediate.
assert.True(r.Active)
assert.Len(r.IntermediateCerts, 1)
xc := testParseCert(t, r.IntermediateCerts[0])
oldRootCert := testParseCert(t, oldRoot.RootCert)
newRootCert := testParseCert(t, r.RootCert)
// Should have the authority key ID and signature algo of the
2018-04-30 03:44:40 +00:00
// (old) signing CA.
assert.Equal(xc.AuthorityKeyId, oldRootCert.AuthorityKeyId)
assert.NotEqual(xc.SubjectKeyId, oldRootCert.SubjectKeyId)
2018-04-30 03:44:40 +00:00
assert.Equal(xc.SignatureAlgorithm, oldRootCert.SignatureAlgorithm)
// The common name and SAN should not have changed.
assert.Equal(xc.Subject.CommonName, newRootCert.Subject.CommonName)
assert.Equal(xc.URIs, newRootCert.URIs)
}
}
}
// Verify the new config was set.
{
args := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var reply structs.CAConfiguration
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationGet", args, &reply))
2018-04-30 03:44:40 +00:00
2018-05-09 22:12:31 +00:00
actual, err := ca.ParseConsulCAConfig(reply.Config)
require.NoError(err)
2018-05-09 22:12:31 +00:00
expected, err := ca.ParseConsulCAConfig(newConfig.Config)
require.NoError(err)
2018-04-30 03:44:40 +00:00
assert.Equal(reply.Provider, newConfig.Provider)
assert.Equal(actual, expected)
}
// Verify that new leaf certs get the cross-signed intermediate bundled
{
// Generate a CSR and request signing
spiffeId := connect.TestSpiffeIDService(t, "web")
2019-08-27 21:45:58 +00:00
csr, _ := connect.TestCSR(t, spiffeId)
args := &structs.CASignRequest{
Datacenter: "dc1",
CSR: csr,
}
var reply structs.IssuedCert
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.Sign", args, &reply))
// Verify that the cert is signed by the new CA
{
roots := x509.NewCertPool()
require.True(roots.AppendCertsFromPEM([]byte(newRootPEM)))
leaf, err := connect.ParseCert(reply.CertPEM)
require.NoError(err)
_, err = leaf.Verify(x509.VerifyOptions{
Roots: roots,
})
require.NoError(err)
}
// And that it validates via the intermediate
{
roots := x509.NewCertPool()
assert.True(roots.AppendCertsFromPEM([]byte(oldRoot.RootCert)))
leaf, err := connect.ParseCert(reply.CertPEM)
require.NoError(err)
// Make sure the intermediate was returned as well as leaf
_, rest := pem.Decode([]byte(reply.CertPEM))
require.NotEmpty(rest)
intermediates := x509.NewCertPool()
require.True(intermediates.AppendCertsFromPEM(rest))
_, err = leaf.Verify(x509.VerifyOptions{
Roots: roots,
Intermediates: intermediates,
})
require.NoError(err)
}
// Verify other fields
assert.Equal("web", reply.Service)
assert.Equal(spiffeId.URI().String(), reply.ServiceURI)
}
2018-04-30 03:44:40 +00:00
}
2018-03-20 03:29:14 +00:00
// Test CA signing
func TestConnectCASign(t *testing.T) {
t.Parallel()
tests := []struct {
caKeyType string
caKeyBits int
}{
{
caKeyType: connect.DefaultPrivateKeyType,
caKeyBits: connect.DefaultPrivateKeyBits,
},
{
// Ensure that an RSA Keyed CA can sign EC leaves and they validate.
caKeyType: "rsa",
caKeyBits: 2048,
},
}
2018-03-20 03:29:14 +00:00
for _, tt := range tests {
t.Run(fmt.Sprintf("%s-%d", tt.caKeyType, tt.caKeyBits), func(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
dir1, s1 := testServerWithConfig(t, func(cfg *Config) {
cfg.CAConfig.Config["PrivateKeyType"] = tt.caKeyType
cfg.CAConfig.Config["PrivateKeyBits"] = tt.caKeyBits
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
2018-03-20 03:29:14 +00:00
testrpc.WaitForLeader(t, s1.RPC, "dc1")
2018-04-30 03:44:40 +00:00
// Generate a CSR and request signing
spiffeId := connect.TestSpiffeIDService(t, "web")
// TestCSR will always generate a CSR with an EC key currently.
csr, _ := connect.TestCSR(t, spiffeId)
args := &structs.CASignRequest{
Datacenter: "dc1",
CSR: csr,
}
var reply structs.IssuedCert
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.Sign", args, &reply))
// Generate a second CSR and request signing
spiffeId2 := connect.TestSpiffeIDService(t, "web2")
csr, _ = connect.TestCSR(t, spiffeId2)
args = &structs.CASignRequest{
Datacenter: "dc1",
CSR: csr,
}
var reply2 structs.IssuedCert
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.Sign", args, &reply2))
require.True(reply2.ModifyIndex > reply.ModifyIndex)
// Get the current CA
state := s1.fsm.State()
_, ca, err := state.CARootActive(nil)
require.NoError(err)
// Verify that the cert is signed by the CA
require.NoError(connect.ValidateLeaf(ca.RootCert, reply.CertPEM, nil))
// Verify other fields
assert.Equal("web", reply.Service)
assert.Equal(spiffeId.URI().String(), reply.ServiceURI)
})
}
2018-03-20 03:29:14 +00:00
}
// Bench how long Signing RPC takes. This was used to ballpark reasonable
// default rate limit to protect servers from thundering herds of signing
// requests on root rotation.
func BenchmarkConnectCASign(b *testing.B) {
t := &testing.T{}
require := require.New(b)
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
// Generate a CSR and request signing
spiffeID := connect.TestSpiffeIDService(b, "web")
2019-08-27 21:45:58 +00:00
csr, _ := connect.TestCSR(b, spiffeID)
args := &structs.CASignRequest{
Datacenter: "dc1",
CSR: csr,
}
var reply structs.IssuedCert
b.ResetTimer()
for n := 0; n < b.N; n++ {
require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.Sign", args, &reply))
}
}
func TestConnectCASign_rateLimit(t *testing.T) {
t.Parallel()
require := require.New(t)
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc1"
c.Bootstrap = true
c.CAConfig.Config = map[string]interface{}{
// It actually doesn't work as expected with some higher values because
// the token bucket is initialized with max(10%, 1) burst which for small
// values is 1 and then the test completes so fast it doesn't actually
// replenish any tokens so you only get the burst allowed through. This is
// OK, running the test slower is likely to be more brittle anyway since
// it will become more timing dependent whether the actual rate the
// requests are made matches the expectation from the sleeps etc.
"CSRMaxPerSecond": 1,
}
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
// Generate a CSR and request signing a few times in a loop.
spiffeID := connect.TestSpiffeIDService(t, "web")
2019-08-27 21:45:58 +00:00
csr, _ := connect.TestCSR(t, spiffeID)
args := &structs.CASignRequest{
Datacenter: "dc1",
CSR: csr,
}
var reply structs.IssuedCert
errs := make([]error, 10)
for i := 0; i < len(errs); i++ {
errs[i] = msgpackrpc.CallWithCodec(codec, "ConnectCA.Sign", args, &reply)
}
limitedCount := 0
successCount := 0
for _, err := range errs {
if err == nil {
successCount++
} else if err.Error() == ErrRateLimited.Error() {
limitedCount++
} else {
require.NoError(err)
}
}
// I've only ever seen this as 1/9 however if the test runs slowly on an
// over-subscribed CPU (e.g. in CI) it's possible that later requests could
// have had their token replenished and succeed so we allow a little slack -
// the test here isn't really the exact token bucket response more a sanity
// check that some limiting is being applied. Note that we can't just measure
// the time it took to send them all and infer how many should have succeeded
// without some complex modeling of the token bucket algorithm.
require.Truef(successCount >= 1, "at least 1 CSRs should have succeeded, got %d", successCount)
require.Truef(limitedCount >= 7, "at least 7 CSRs should have been rate limited, got %d", limitedCount)
}
func TestConnectCASign_concurrencyLimit(t *testing.T) {
t.Parallel()
require := require.New(t)
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc1"
c.Bootstrap = true
c.CAConfig.Config = map[string]interface{}{
// Must disable the rate limit since it takes precedence
"CSRMaxPerSecond": 0,
"CSRMaxConcurrent": 1,
}
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
// Generate a CSR and request signing a few times in a loop.
spiffeID := connect.TestSpiffeIDService(t, "web")
2019-08-27 21:45:58 +00:00
csr, _ := connect.TestCSR(t, spiffeID)
args := &structs.CASignRequest{
Datacenter: "dc1",
CSR: csr,
}
var wg sync.WaitGroup
errs := make(chan error, 10)
times := make(chan time.Duration, cap(errs))
start := time.Now()
for i := 0; i < cap(errs); i++ {
wg.Add(1)
go func() {
defer wg.Done()
codec := rpcClient(t, s1)
defer codec.Close()
var reply structs.IssuedCert
errs <- msgpackrpc.CallWithCodec(codec, "ConnectCA.Sign", args, &reply)
times <- time.Since(start)
}()
}
wg.Wait()
close(errs)
limitedCount := 0
successCount := 0
var minTime, maxTime time.Duration
for err := range errs {
elapsed := <-times
if elapsed < minTime || minTime == 0 {
minTime = elapsed
}
if elapsed > maxTime {
maxTime = elapsed
}
if err == nil {
successCount++
} else if err.Error() == ErrRateLimited.Error() {
limitedCount++
} else {
require.NoError(err)
}
}
// These are very hand wavy - on my mac times look like this:
// 2.776009ms
// 3.705813ms
// 4.527212ms
// 5.267755ms
// 6.119809ms
// 6.958083ms
// 7.869179ms
// 8.675058ms
// 9.512281ms
// 10.238183ms
//
// But it's indistinguishable from noise - even if you disable the concurrency
// limiter you get pretty much the same pattern/spread.
//
// On the other hand it's only timing that stops us from not hitting the 500ms
// timeout. On highly CPU constrained CI box this could be brittle if we
// assert that we never get rate limited.
//
// So this test is not super strong - but it's a sanity check at least that
// things don't break when configured this way, and through manual
// inspection/debug logging etc. we can verify it's actually doing the
// concurrency limit thing. If you add a 100ms sleep into the sign endpoint
// after the rate limit code for example it makes it much more obvious:
//
// With 100ms sleep an no concurrency limit:
// min=109ms, max=118ms
// With concurrency limit of 1:
// min=106ms, max=538ms (with ~half hitting the 500ms timeout)
//
// Without instrumenting the endpoint to make the RPC take an artificially
// long time it's hard to know what else we can do to actively detect that the
// requests were serialized.
t.Logf("min=%s, max=%s", minTime, maxTime)
//t.Fail() // Uncomment to see the time spread logged
require.Truef(successCount >= 1, "at least 1 CSRs should have succeeded, got %d", successCount)
}
func TestConnectCASignValidation(t *testing.T) {
t.Parallel()
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.ACLDatacenter = "dc1"
New ACLs (#4791) This PR is almost a complete rewrite of the ACL system within Consul. It brings the features more in line with other HashiCorp products. Obviously there is quite a bit left to do here but most of it is related docs, testing and finishing the last few commands in the CLI. I will update the PR description and check off the todos as I finish them over the next few days/week. Description At a high level this PR is mainly to split ACL tokens from Policies and to split the concepts of Authorization from Identities. A lot of this PR is mostly just to support CRUD operations on ACLTokens and ACLPolicies. These in and of themselves are not particularly interesting. The bigger conceptual changes are in how tokens get resolved, how backwards compatibility is handled and the separation of policy from identity which could lead the way to allowing for alternative identity providers. On the surface and with a new cluster the ACL system will look very similar to that of Nomads. Both have tokens and policies. Both have local tokens. The ACL management APIs for both are very similar. I even ripped off Nomad's ACL bootstrap resetting procedure. There are a few key differences though. Nomad requires token and policy replication where Consul only requires policy replication with token replication being opt-in. In Consul local tokens only work with token replication being enabled though. All policies in Nomad are globally applicable. In Consul all policies are stored and replicated globally but can be scoped to a subset of the datacenters. This allows for more granular access management. Unlike Nomad, Consul has legacy baggage in the form of the original ACL system. The ramifications of this are: A server running the new system must still support other clients using the legacy system. A client running the new system must be able to use the legacy RPCs when the servers in its datacenter are running the legacy system. The primary ACL DC's servers running in legacy mode needs to be a gate that keeps everything else in the entire multi-DC cluster running in legacy mode. So not only does this PR implement the new ACL system but has a legacy mode built in for when the cluster isn't ready for new ACLs. Also detecting that new ACLs can be used is automatic and requires no configuration on the part of administrators. This process is detailed more in the "Transitioning from Legacy to New ACL Mode" section below.
2018-10-19 16:04:07 +00:00
c.ACLsEnabled = true
c.ACLMasterToken = "root"
c.ACLDefaultPolicy = "deny"
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
// Create an ACL token with service:write for web*
var webToken string
{
arg := structs.ACLRequest{
Datacenter: "dc1",
Op: structs.ACLSet,
ACL: structs.ACL{
Name: "User token",
New ACLs (#4791) This PR is almost a complete rewrite of the ACL system within Consul. It brings the features more in line with other HashiCorp products. Obviously there is quite a bit left to do here but most of it is related docs, testing and finishing the last few commands in the CLI. I will update the PR description and check off the todos as I finish them over the next few days/week. Description At a high level this PR is mainly to split ACL tokens from Policies and to split the concepts of Authorization from Identities. A lot of this PR is mostly just to support CRUD operations on ACLTokens and ACLPolicies. These in and of themselves are not particularly interesting. The bigger conceptual changes are in how tokens get resolved, how backwards compatibility is handled and the separation of policy from identity which could lead the way to allowing for alternative identity providers. On the surface and with a new cluster the ACL system will look very similar to that of Nomads. Both have tokens and policies. Both have local tokens. The ACL management APIs for both are very similar. I even ripped off Nomad's ACL bootstrap resetting procedure. There are a few key differences though. Nomad requires token and policy replication where Consul only requires policy replication with token replication being opt-in. In Consul local tokens only work with token replication being enabled though. All policies in Nomad are globally applicable. In Consul all policies are stored and replicated globally but can be scoped to a subset of the datacenters. This allows for more granular access management. Unlike Nomad, Consul has legacy baggage in the form of the original ACL system. The ramifications of this are: A server running the new system must still support other clients using the legacy system. A client running the new system must be able to use the legacy RPCs when the servers in its datacenter are running the legacy system. The primary ACL DC's servers running in legacy mode needs to be a gate that keeps everything else in the entire multi-DC cluster running in legacy mode. So not only does this PR implement the new ACL system but has a legacy mode built in for when the cluster isn't ready for new ACLs. Also detecting that new ACLs can be used is automatic and requires no configuration on the part of administrators. This process is detailed more in the "Transitioning from Legacy to New ACL Mode" section below.
2018-10-19 16:04:07 +00:00
Type: structs.ACLTokenTypeClient,
Rules: `
service "web" {
policy = "write"
}`,
},
WriteRequest: structs.WriteRequest{Token: "root"},
}
require.NoError(t, msgpackrpc.CallWithCodec(codec, "ACL.Apply", &arg, &webToken))
}
testWebID := connect.TestSpiffeIDService(t, "web")
tests := []struct {
name string
id connect.CertURI
wantErr string
}{
{
name: "different cluster",
id: &connect.SpiffeIDService{
Host: "55555555-4444-3333-2222-111111111111.consul",
Namespace: testWebID.Namespace,
Datacenter: testWebID.Datacenter,
Service: testWebID.Service,
},
wantErr: "different trust domain",
},
{
name: "same cluster should validate",
id: testWebID,
wantErr: "",
},
{
name: "same cluster, CSR for a different DC should NOT validate",
id: &connect.SpiffeIDService{
Host: testWebID.Host,
Namespace: testWebID.Namespace,
Datacenter: "dc2",
Service: testWebID.Service,
},
wantErr: "different datacenter",
},
{
name: "same cluster and DC, different service should not have perms",
id: &connect.SpiffeIDService{
Host: testWebID.Host,
Namespace: testWebID.Namespace,
Datacenter: testWebID.Datacenter,
Service: "db",
},
wantErr: "Permission denied",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
2019-08-27 21:45:58 +00:00
csr, _ := connect.TestCSR(t, tt.id)
args := &structs.CASignRequest{
Datacenter: "dc1",
CSR: csr,
WriteRequest: structs.WriteRequest{Token: webToken},
}
var reply structs.IssuedCert
err := msgpackrpc.CallWithCodec(codec, "ConnectCA.Sign", args, &reply)
if tt.wantErr == "" {
require.NoError(t, err)
// No other validation that is handled in different tests
} else {
require.Error(t, err)
require.Contains(t, err.Error(), tt.wantErr)
}
})
}
}