open-consul/agent/consul/leader_intentions_test.go

589 lines
16 KiB
Go
Raw Normal View History

connect: intentions are now managed as a new config entry kind "service-intentions" (#8834) - Upgrade the ConfigEntry.ListAll RPC to be kind-aware so that older copies of consul will not see new config entries it doesn't understand replicate down. - Add shim conversion code so that the old API/CLI method of interacting with intentions will continue to work so long as none of these are edited via config entry endpoints. Almost all of the read-only APIs will continue to function indefinitely. - Add new APIs that operate on individual intentions without IDs so that the UI doesn't need to implement CAS operations. - Add a new serf feature flag indicating support for intentions-as-config-entries. - The old line-item intentions way of interacting with the state store will transparently flip between the legacy memdb table and the config entry representations so that readers will never see a hiccup during migration where the results are incomplete. It uses a piece of system metadata to control the flip. - The primary datacenter will begin migrating intentions into config entries on startup once all servers in the datacenter are on a version of Consul with the intentions-as-config-entries feature flag. When it is complete the old state store representations will be cleared. We also record a piece of system metadata indicating this has occurred. We use this metadata to skip ALL of this code the next time the leader starts up. - The secondary datacenters continue to run the old intentions replicator until all servers in the secondary DC and primary DC support intentions-as-config-entries (via serf flag). Once this condition it met the old intentions replicator ceases. - The secondary datacenters replicate the new config entries as they are migrated in the primary. When they detect that the primary has zeroed it's old state store table it waits until all config entries up to that point are replicated and then zeroes its own copy of the old state store table. We also record a piece of system metadata indicating this has occurred. We use this metadata to skip ALL of this code the next time the leader starts up.
2020-10-06 18:24:05 +00:00
package consul
import (
"os"
"strings"
"testing"
"time"
"github.com/hashicorp/consul/agent/structs"
tokenStore "github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/hashicorp/consul/testrpc"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestLeader_ReplicateIntentions(t *testing.T) {
// This setup is a little hacky, but should work. We spin up BOTH servers with
// no intentions and force them to think they're not eligible for intentions
// config entries yet by overriding serf tags.
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc1"
c.ACLDatacenter = "dc1"
c.ACLsEnabled = true
c.ACLMasterToken = "root"
c.ACLDefaultPolicy = "deny"
c.Build = "1.6.0"
c.OverrideInitialSerfTags = func(tags map[string]string) {
tags["ft_si"] = "0"
}
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
waitForLeaderEstablishment(t, s1)
retry.Run(t, func(r *retry.R) {
if s1.DatacenterSupportsIntentionsAsConfigEntries() {
r.Fatal("server 1 shouldn't activate service-intentions")
}
})
s1.tokens.UpdateAgentToken("root", tokenStore.TokenSourceConfig)
replicationRules := `acl = "read" service_prefix "" { policy = "read" intentions = "read" } operator = "write" `
// create some tokens
replToken1, err := upsertTestTokenWithPolicyRules(codec, "root", "dc1", replicationRules)
require.NoError(t, err)
replToken2, err := upsertTestTokenWithPolicyRules(codec, "root", "dc1", replicationRules)
require.NoError(t, err)
// dc2 as a secondary DC
dir2, s2 := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc2"
c.ACLDatacenter = "dc1"
c.ACLsEnabled = true
c.ACLDefaultPolicy = "deny"
c.ACLTokenReplication = false
c.Build = "1.6.0"
c.OverrideInitialSerfTags = func(tags map[string]string) {
tags["ft_si"] = "0"
}
})
defer os.RemoveAll(dir2)
defer s2.Shutdown()
s2.tokens.UpdateAgentToken("root", tokenStore.TokenSourceConfig)
// start out with one token
s2.tokens.UpdateReplicationToken(replToken1.SecretID, tokenStore.TokenSourceConfig)
// Create the WAN link
joinWAN(t, s2, s1)
waitForLeaderEstablishment(t, s2)
retry.Run(t, func(r *retry.R) {
if s2.DatacenterSupportsIntentionsAsConfigEntries() {
r.Fatal("server 2 shouldn't activate service-intentions")
}
})
legacyApply := func(s *Server, req *structs.IntentionRequest) error {
if req.Op != structs.IntentionOpDelete {
// Do these directly on the inputs so it's corrected for future
// equality checks.
req.Intention.CreatedAt = time.Now().UTC()
req.Intention.UpdatedAt = req.Intention.CreatedAt
//nolint:staticcheck
req.Intention.UpdatePrecedence()
//nolint:staticcheck
require.NoError(t, req.Intention.Validate())
//nolint:staticcheck
req.Intention.SetHash()
}
req2 := *req
req2.Intention = req.Intention.Clone()
if req.Op != structs.IntentionOpDelete {
req2.Intention.Hash = req.Intention.Hash // not part of Clone
}
resp, err := s.raftApply(structs.IntentionRequestType, req2)
if err != nil {
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
return nil
}
// Directly insert legacy intentions into raft in dc1.
id := generateUUID()
ixn := structs.IntentionRequest{
Datacenter: "dc1",
WriteRequest: structs.WriteRequest{Token: "root"},
Op: structs.IntentionOpCreate,
Intention: &structs.Intention{
ID: id,
SourceNS: structs.IntentionDefaultNamespace,
SourceName: "test",
DestinationNS: structs.IntentionDefaultNamespace,
DestinationName: "test",
Action: structs.IntentionActionAllow,
SourceType: structs.IntentionSourceConsul,
Meta: map[string]string{},
},
}
require.NoError(t, legacyApply(s1, &ixn))
// Wait for it to get replicated to dc2
var createdAt time.Time
retry.Run(t, func(r *retry.R) {
req := &structs.IntentionQueryRequest{
Datacenter: "dc2",
QueryOptions: structs.QueryOptions{Token: "root"},
IntentionID: ixn.Intention.ID,
}
var resp structs.IndexedIntentions
require.NoError(r, s2.RPC("Intention.Get", req, &resp), "ID=%q", ixn.Intention.ID)
require.Len(r, resp.Intentions, 1)
actual := resp.Intentions[0]
createdAt = actual.CreatedAt
})
// Sleep a bit so that the UpdatedAt field will definitely be different
time.Sleep(1 * time.Millisecond)
// delete underlying acl token being used for replication
require.NoError(t, deleteTestToken(codec, "root", "dc1", replToken1.AccessorID))
// switch to the other token
s2.tokens.UpdateReplicationToken(replToken2.SecretID, tokenStore.TokenSourceConfig)
// Update the intention in dc1
ixn.Op = structs.IntentionOpUpdate
ixn.Intention.ID = id
ixn.Intention.SourceName = "*"
require.NoError(t, legacyApply(s1, &ixn))
// Wait for dc2 to get the update
var resp structs.IndexedIntentions
retry.Run(t, func(r *retry.R) {
req := &structs.IntentionQueryRequest{
Datacenter: "dc2",
QueryOptions: structs.QueryOptions{Token: "root"},
IntentionID: ixn.Intention.ID,
}
require.NoError(r, s2.RPC("Intention.Get", req, &resp), "ID=%q", ixn.Intention.ID)
require.Len(r, resp.Intentions, 1)
require.Equal(r, "*", resp.Intentions[0].SourceName)
})
actual := resp.Intentions[0]
require.Equal(t, createdAt, actual.CreatedAt)
require.WithinDuration(t, time.Now(), actual.UpdatedAt, 5*time.Second)
actual.CreateIndex, actual.ModifyIndex = 0, 0
actual.CreatedAt = ixn.Intention.CreatedAt
actual.UpdatedAt = ixn.Intention.UpdatedAt
//nolint:staticcheck
ixn.Intention.UpdatePrecedence()
require.Equal(t, ixn.Intention, actual)
// Delete
require.NoError(t, legacyApply(s1, &structs.IntentionRequest{
Datacenter: "dc1",
WriteRequest: structs.WriteRequest{Token: "root"},
Op: structs.IntentionOpDelete,
Intention: &structs.Intention{
ID: ixn.Intention.ID,
},
}))
// Wait for the delete to be replicated
retry.Run(t, func(r *retry.R) {
req := &structs.IntentionQueryRequest{
Datacenter: "dc2",
QueryOptions: structs.QueryOptions{Token: "root"},
IntentionID: ixn.Intention.ID,
}
var resp structs.IndexedIntentions
err := s2.RPC("Intention.Get", req, &resp)
require.Error(r, err)
if !strings.Contains(err.Error(), ErrIntentionNotFound.Error()) {
r.Fatalf("expected intention not found, got: %v", err)
}
})
}
//nolint:staticcheck
func TestLeader_batchLegacyIntentionUpdates(t *testing.T) {
t.Parallel()
assert := assert.New(t)
ixn1 := structs.TestIntention(t)
ixn1.ID = "ixn1"
ixn2 := structs.TestIntention(t)
ixn2.ID = "ixn2"
ixnLarge := structs.TestIntention(t)
ixnLarge.ID = "ixnLarge"
ixnLarge.Description = strings.Repeat("x", maxIntentionTxnSize-1)
cases := []struct {
deletes structs.Intentions
updates structs.Intentions
expected []structs.TxnOps
}{
// 1 deletes, 0 updates
{
deletes: structs.Intentions{ixn1},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpDelete,
Intention: ixn1,
},
},
},
},
},
// 0 deletes, 1 updates
{
updates: structs.Intentions{ixn1},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixn1,
},
},
},
},
},
// 1 deletes, 1 updates
{
deletes: structs.Intentions{ixn1},
updates: structs.Intentions{ixn2},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpDelete,
Intention: ixn1,
},
},
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixn2,
},
},
},
},
},
// 1 large intention update
{
updates: structs.Intentions{ixnLarge},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixnLarge,
},
},
},
},
},
// 2 deletes (w/ a large intention), 1 updates
{
deletes: structs.Intentions{ixn1, ixnLarge},
updates: structs.Intentions{ixn2},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpDelete,
Intention: ixn1,
},
},
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpDelete,
Intention: ixnLarge,
},
},
},
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixn2,
},
},
},
},
},
// 1 deletes , 2 updates (w/ a large intention)
{
deletes: structs.Intentions{ixn1},
updates: structs.Intentions{ixnLarge, ixn2},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpDelete,
Intention: ixn1,
},
},
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixnLarge,
},
},
},
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixn2,
},
},
},
},
},
}
for _, tc := range cases {
actual := batchLegacyIntentionUpdates(tc.deletes, tc.updates)
assert.Equal(tc.expected, actual)
}
}
func TestLeader_LegacyIntentionMigration(t *testing.T) {
// This setup is a little hacky, but should work. We spin up a server with
// no intentions and force it to think it's not eligible for intentions
// config entries yet by overriding serf tags.
//
// Then we directly write legacy intentions into raft. This is mimicking
// what a service-intentions aware server might do if an older copy of
// consul was still leader.
//
// This lets us generate a snapshot+raft state containing legacy intentions
// without having to spin up an old version of consul for the test.
//
// Then we shut it down and bring up a new copy on that datadir which
// should then trigger migration code.
dir1pre, s1pre := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc1"
c.Build = "1.6.0"
c.OverrideInitialSerfTags = func(tags map[string]string) {
tags["ft_si"] = "0"
}
})
defer os.RemoveAll(dir1pre)
defer s1pre.Shutdown()
testrpc.WaitForLeader(t, s1pre.RPC, "dc1")
retry.Run(t, func(r *retry.R) {
if s1pre.DatacenterSupportsIntentionsAsConfigEntries() {
r.Fatal("server 1 shouldn't activate service-intentions")
}
})
// Insert a bunch of legacy intentions.
makeIxn := func(src, dest string, allow bool) *structs.Intention {
ixn := &structs.Intention{
ID: generateUUID(),
SourceNS: structs.IntentionDefaultNamespace,
SourceName: src,
DestinationNS: structs.IntentionDefaultNamespace,
DestinationName: dest,
SourceType: structs.IntentionSourceConsul,
Meta: map[string]string{},
}
if allow {
ixn.Action = structs.IntentionActionAllow
} else {
ixn.Action = structs.IntentionActionDeny
}
//nolint:staticcheck
ixn.UpdatePrecedence()
//nolint:staticcheck
ixn.SetHash()
return ixn
}
ixns := []*structs.Intention{
makeIxn("api", "db", true),
makeIxn("web", "db", false),
makeIxn("*", "web", true),
makeIxn("*", "api", false),
makeIxn("intern", "*", false),
makeIxn("contractor", "*", false),
makeIxn("*", "*", true),
}
for _, ixn := range ixns {
ixn2 := *ixn
resp, err := s1pre.raftApply(structs.IntentionRequestType, &structs.IntentionRequest{
Op: structs.IntentionOpCreate,
Intention: &ixn2,
})
require.NoError(t, err)
if respErr, ok := resp.(error); ok {
t.Fatalf("respErr: %v", respErr)
}
}
mapify := func(ixns []*structs.Intention) map[string]*structs.Intention {
m := make(map[string]*structs.Intention)
for _, ixn := range ixns {
m[ixn.ID] = ixn
}
return m
}
checkIntentions := func(t *testing.T, srv *Server, legacyOnly bool, expect map[string]*structs.Intention) {
t.Helper()
wildMeta := structs.WildcardEnterpriseMeta()
retry.Run(t, func(r *retry.R) {
var (
got structs.Intentions
err error
)
if legacyOnly {
_, got, err = srv.fsm.State().LegacyIntentions(nil, wildMeta)
} else {
_, got, _, err = srv.fsm.State().Intentions(nil, wildMeta)
}
require.NoError(r, err)
gotM := mapify(got)
assert.Len(r, gotM, len(expect))
for k, expectV := range expect {
gotV, ok := gotM[k]
if !ok {
r.Errorf("results are missing key %q", k)
continue
}
assert.Equal(r, expectV.ID, gotV.ID)
assert.Equal(r, expectV.SourceNS, gotV.SourceNS)
assert.Equal(r, expectV.SourceName, gotV.SourceName)
assert.Equal(r, expectV.DestinationNS, gotV.DestinationNS)
assert.Equal(r, expectV.DestinationName, gotV.DestinationName)
assert.Equal(r, expectV.Action, gotV.Action)
assert.Equal(r, expectV.Meta, gotV.Meta)
assert.Equal(r, expectV.Precedence, gotV.Precedence)
assert.Equal(r, expectV.SourceType, gotV.SourceType)
}
})
}
expectM := mapify(ixns)
checkIntentions(t, s1pre, false, expectM)
checkIntentions(t, s1pre, true, expectM)
// Shutdown s1pre and restart it to trigger migration.
s1pre.Shutdown()
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.DataDir = s1pre.config.DataDir
c.Datacenter = "dc1"
c.NodeName = s1pre.config.NodeName
c.NodeID = s1pre.config.NodeID
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
// check that all 7 intentions are present before migration
checkIntentions(t, s1, false, expectM)
// Wait until the migration routine is complete.
retry.Run(t, func(r *retry.R) {
intentionFormat, err := s1.getSystemMetadata(structs.SystemMetadataIntentionFormatKey)
require.NoError(r, err)
if intentionFormat != structs.SystemMetadataIntentionFormatConfigValue {
r.Fatal("intention migration is not yet complete")
}
})
// check that all 7 intentions are present the general way after migration
checkIntentions(t, s1, false, expectM)
// check that no intentions exist in the legacy table
checkIntentions(t, s1, true, map[string]*structs.Intention{})
mapifyConfigs := func(entries interface{}) map[structs.ConfigEntryKindName]*structs.ServiceIntentionsConfigEntry {
m := make(map[structs.ConfigEntryKindName]*structs.ServiceIntentionsConfigEntry)
switch v := entries.(type) {
case []*structs.ServiceIntentionsConfigEntry:
for _, entry := range v {
kn := structs.NewConfigEntryKindName(entry.Kind, entry.Name, &entry.EnterpriseMeta)
m[kn] = entry
}
case []structs.ConfigEntry:
for _, entry := range v {
kn := structs.NewConfigEntryKindName(entry.GetKind(), entry.GetName(), entry.GetEnterpriseMeta())
m[kn] = entry.(*structs.ServiceIntentionsConfigEntry)
}
default:
t.Fatalf("bad type: %T", entries)
}
return m
}
// also check config entries
_, gotConfigs, err := s1.fsm.State().ConfigEntriesByKind(nil, structs.ServiceIntentions, structs.WildcardEnterpriseMeta())
require.NoError(t, err)
gotConfigsM := mapifyConfigs(gotConfigs)
expectConfigs := structs.MigrateIntentions(ixns)
for _, entry := range expectConfigs {
require.NoError(t, entry.LegacyNormalize()) // tidy them up the same way the write would
}
expectConfigsM := mapifyConfigs(expectConfigs)
assert.Len(t, gotConfigsM, len(expectConfigsM))
for kn, expectV := range expectConfigsM {
gotV, ok := gotConfigsM[kn]
if !ok {
t.Errorf("results are missing key %q", kn)
continue
}
// Migrated intentions won't have toplevel Meta.
assert.Nil(t, gotV.Meta)
require.Len(t, gotV.Sources, len(expectV.Sources))
expSrcMap := make(map[string]*structs.SourceIntention)
for i, src := range expectV.Sources {
require.NotEmpty(t, src.LegacyID, "index[%d] missing LegacyID", i)
// Do a shallow copy and strip the times from the copy
src2 := *src
src2.LegacyCreateTime = nil
src2.LegacyUpdateTime = nil
expSrcMap[src2.LegacyID] = &src2
}
for i, got := range gotV.Sources {
require.NotEmpty(t, got.LegacyID, "index[%d] missing LegacyID", i)
// Do a shallow copy and strip the times from the copy
got2 := *got
got2.LegacyCreateTime = nil
got2.LegacyUpdateTime = nil
cmp, ok := expSrcMap[got2.LegacyID]
require.True(t, ok, "missing %q", got2.LegacyID)
assert.Equal(t, cmp, &got2, "index[%d]", i)
}
}
}