open-consul/agent/consul/leader_intentions_test.go
R.B. Boyer d6dce2332a
connect: intentions are now managed as a new config entry kind "service-intentions" (#8834)
- Upgrade the ConfigEntry.ListAll RPC to be kind-aware so that older
copies of consul will not see new config entries it doesn't understand
replicate down.

- Add shim conversion code so that the old API/CLI method of interacting
with intentions will continue to work so long as none of these are
edited via config entry endpoints. Almost all of the read-only APIs will
continue to function indefinitely.

- Add new APIs that operate on individual intentions without IDs so that
the UI doesn't need to implement CAS operations.

- Add a new serf feature flag indicating support for
intentions-as-config-entries.

- The old line-item intentions way of interacting with the state store
will transparently flip between the legacy memdb table and the config
entry representations so that readers will never see a hiccup during
migration where the results are incomplete. It uses a piece of system
metadata to control the flip.

- The primary datacenter will begin migrating intentions into config
entries on startup once all servers in the datacenter are on a version
of Consul with the intentions-as-config-entries feature flag. When it is
complete the old state store representations will be cleared. We also
record a piece of system metadata indicating this has occurred. We use
this metadata to skip ALL of this code the next time the leader starts
up.

- The secondary datacenters continue to run the old intentions
replicator until all servers in the secondary DC and primary DC support
intentions-as-config-entries (via serf flag). Once this condition it met
the old intentions replicator ceases.

- The secondary datacenters replicate the new config entries as they are
migrated in the primary. When they detect that the primary has zeroed
it's old state store table it waits until all config entries up to that
point are replicated and then zeroes its own copy of the old state store
table. We also record a piece of system metadata indicating this has
occurred. We use this metadata to skip ALL of this code the next time
the leader starts up.
2020-10-06 13:24:05 -05:00

589 lines
16 KiB
Go

package consul
import (
"os"
"strings"
"testing"
"time"
"github.com/hashicorp/consul/agent/structs"
tokenStore "github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/hashicorp/consul/testrpc"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestLeader_ReplicateIntentions(t *testing.T) {
// This setup is a little hacky, but should work. We spin up BOTH servers with
// no intentions and force them to think they're not eligible for intentions
// config entries yet by overriding serf tags.
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc1"
c.ACLDatacenter = "dc1"
c.ACLsEnabled = true
c.ACLMasterToken = "root"
c.ACLDefaultPolicy = "deny"
c.Build = "1.6.0"
c.OverrideInitialSerfTags = func(tags map[string]string) {
tags["ft_si"] = "0"
}
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()
waitForLeaderEstablishment(t, s1)
retry.Run(t, func(r *retry.R) {
if s1.DatacenterSupportsIntentionsAsConfigEntries() {
r.Fatal("server 1 shouldn't activate service-intentions")
}
})
s1.tokens.UpdateAgentToken("root", tokenStore.TokenSourceConfig)
replicationRules := `acl = "read" service_prefix "" { policy = "read" intentions = "read" } operator = "write" `
// create some tokens
replToken1, err := upsertTestTokenWithPolicyRules(codec, "root", "dc1", replicationRules)
require.NoError(t, err)
replToken2, err := upsertTestTokenWithPolicyRules(codec, "root", "dc1", replicationRules)
require.NoError(t, err)
// dc2 as a secondary DC
dir2, s2 := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc2"
c.ACLDatacenter = "dc1"
c.ACLsEnabled = true
c.ACLDefaultPolicy = "deny"
c.ACLTokenReplication = false
c.Build = "1.6.0"
c.OverrideInitialSerfTags = func(tags map[string]string) {
tags["ft_si"] = "0"
}
})
defer os.RemoveAll(dir2)
defer s2.Shutdown()
s2.tokens.UpdateAgentToken("root", tokenStore.TokenSourceConfig)
// start out with one token
s2.tokens.UpdateReplicationToken(replToken1.SecretID, tokenStore.TokenSourceConfig)
// Create the WAN link
joinWAN(t, s2, s1)
waitForLeaderEstablishment(t, s2)
retry.Run(t, func(r *retry.R) {
if s2.DatacenterSupportsIntentionsAsConfigEntries() {
r.Fatal("server 2 shouldn't activate service-intentions")
}
})
legacyApply := func(s *Server, req *structs.IntentionRequest) error {
if req.Op != structs.IntentionOpDelete {
// Do these directly on the inputs so it's corrected for future
// equality checks.
req.Intention.CreatedAt = time.Now().UTC()
req.Intention.UpdatedAt = req.Intention.CreatedAt
//nolint:staticcheck
req.Intention.UpdatePrecedence()
//nolint:staticcheck
require.NoError(t, req.Intention.Validate())
//nolint:staticcheck
req.Intention.SetHash()
}
req2 := *req
req2.Intention = req.Intention.Clone()
if req.Op != structs.IntentionOpDelete {
req2.Intention.Hash = req.Intention.Hash // not part of Clone
}
resp, err := s.raftApply(structs.IntentionRequestType, req2)
if err != nil {
return err
}
if respErr, ok := resp.(error); ok {
return respErr
}
return nil
}
// Directly insert legacy intentions into raft in dc1.
id := generateUUID()
ixn := structs.IntentionRequest{
Datacenter: "dc1",
WriteRequest: structs.WriteRequest{Token: "root"},
Op: structs.IntentionOpCreate,
Intention: &structs.Intention{
ID: id,
SourceNS: structs.IntentionDefaultNamespace,
SourceName: "test",
DestinationNS: structs.IntentionDefaultNamespace,
DestinationName: "test",
Action: structs.IntentionActionAllow,
SourceType: structs.IntentionSourceConsul,
Meta: map[string]string{},
},
}
require.NoError(t, legacyApply(s1, &ixn))
// Wait for it to get replicated to dc2
var createdAt time.Time
retry.Run(t, func(r *retry.R) {
req := &structs.IntentionQueryRequest{
Datacenter: "dc2",
QueryOptions: structs.QueryOptions{Token: "root"},
IntentionID: ixn.Intention.ID,
}
var resp structs.IndexedIntentions
require.NoError(r, s2.RPC("Intention.Get", req, &resp), "ID=%q", ixn.Intention.ID)
require.Len(r, resp.Intentions, 1)
actual := resp.Intentions[0]
createdAt = actual.CreatedAt
})
// Sleep a bit so that the UpdatedAt field will definitely be different
time.Sleep(1 * time.Millisecond)
// delete underlying acl token being used for replication
require.NoError(t, deleteTestToken(codec, "root", "dc1", replToken1.AccessorID))
// switch to the other token
s2.tokens.UpdateReplicationToken(replToken2.SecretID, tokenStore.TokenSourceConfig)
// Update the intention in dc1
ixn.Op = structs.IntentionOpUpdate
ixn.Intention.ID = id
ixn.Intention.SourceName = "*"
require.NoError(t, legacyApply(s1, &ixn))
// Wait for dc2 to get the update
var resp structs.IndexedIntentions
retry.Run(t, func(r *retry.R) {
req := &structs.IntentionQueryRequest{
Datacenter: "dc2",
QueryOptions: structs.QueryOptions{Token: "root"},
IntentionID: ixn.Intention.ID,
}
require.NoError(r, s2.RPC("Intention.Get", req, &resp), "ID=%q", ixn.Intention.ID)
require.Len(r, resp.Intentions, 1)
require.Equal(r, "*", resp.Intentions[0].SourceName)
})
actual := resp.Intentions[0]
require.Equal(t, createdAt, actual.CreatedAt)
require.WithinDuration(t, time.Now(), actual.UpdatedAt, 5*time.Second)
actual.CreateIndex, actual.ModifyIndex = 0, 0
actual.CreatedAt = ixn.Intention.CreatedAt
actual.UpdatedAt = ixn.Intention.UpdatedAt
//nolint:staticcheck
ixn.Intention.UpdatePrecedence()
require.Equal(t, ixn.Intention, actual)
// Delete
require.NoError(t, legacyApply(s1, &structs.IntentionRequest{
Datacenter: "dc1",
WriteRequest: structs.WriteRequest{Token: "root"},
Op: structs.IntentionOpDelete,
Intention: &structs.Intention{
ID: ixn.Intention.ID,
},
}))
// Wait for the delete to be replicated
retry.Run(t, func(r *retry.R) {
req := &structs.IntentionQueryRequest{
Datacenter: "dc2",
QueryOptions: structs.QueryOptions{Token: "root"},
IntentionID: ixn.Intention.ID,
}
var resp structs.IndexedIntentions
err := s2.RPC("Intention.Get", req, &resp)
require.Error(r, err)
if !strings.Contains(err.Error(), ErrIntentionNotFound.Error()) {
r.Fatalf("expected intention not found, got: %v", err)
}
})
}
//nolint:staticcheck
func TestLeader_batchLegacyIntentionUpdates(t *testing.T) {
t.Parallel()
assert := assert.New(t)
ixn1 := structs.TestIntention(t)
ixn1.ID = "ixn1"
ixn2 := structs.TestIntention(t)
ixn2.ID = "ixn2"
ixnLarge := structs.TestIntention(t)
ixnLarge.ID = "ixnLarge"
ixnLarge.Description = strings.Repeat("x", maxIntentionTxnSize-1)
cases := []struct {
deletes structs.Intentions
updates structs.Intentions
expected []structs.TxnOps
}{
// 1 deletes, 0 updates
{
deletes: structs.Intentions{ixn1},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpDelete,
Intention: ixn1,
},
},
},
},
},
// 0 deletes, 1 updates
{
updates: structs.Intentions{ixn1},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixn1,
},
},
},
},
},
// 1 deletes, 1 updates
{
deletes: structs.Intentions{ixn1},
updates: structs.Intentions{ixn2},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpDelete,
Intention: ixn1,
},
},
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixn2,
},
},
},
},
},
// 1 large intention update
{
updates: structs.Intentions{ixnLarge},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixnLarge,
},
},
},
},
},
// 2 deletes (w/ a large intention), 1 updates
{
deletes: structs.Intentions{ixn1, ixnLarge},
updates: structs.Intentions{ixn2},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpDelete,
Intention: ixn1,
},
},
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpDelete,
Intention: ixnLarge,
},
},
},
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixn2,
},
},
},
},
},
// 1 deletes , 2 updates (w/ a large intention)
{
deletes: structs.Intentions{ixn1},
updates: structs.Intentions{ixnLarge, ixn2},
expected: []structs.TxnOps{
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpDelete,
Intention: ixn1,
},
},
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixnLarge,
},
},
},
{
&structs.TxnOp{
Intention: &structs.TxnIntentionOp{
Op: structs.IntentionOpUpdate,
Intention: ixn2,
},
},
},
},
},
}
for _, tc := range cases {
actual := batchLegacyIntentionUpdates(tc.deletes, tc.updates)
assert.Equal(tc.expected, actual)
}
}
func TestLeader_LegacyIntentionMigration(t *testing.T) {
// This setup is a little hacky, but should work. We spin up a server with
// no intentions and force it to think it's not eligible for intentions
// config entries yet by overriding serf tags.
//
// Then we directly write legacy intentions into raft. This is mimicking
// what a service-intentions aware server might do if an older copy of
// consul was still leader.
//
// This lets us generate a snapshot+raft state containing legacy intentions
// without having to spin up an old version of consul for the test.
//
// Then we shut it down and bring up a new copy on that datadir which
// should then trigger migration code.
dir1pre, s1pre := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc1"
c.Build = "1.6.0"
c.OverrideInitialSerfTags = func(tags map[string]string) {
tags["ft_si"] = "0"
}
})
defer os.RemoveAll(dir1pre)
defer s1pre.Shutdown()
testrpc.WaitForLeader(t, s1pre.RPC, "dc1")
retry.Run(t, func(r *retry.R) {
if s1pre.DatacenterSupportsIntentionsAsConfigEntries() {
r.Fatal("server 1 shouldn't activate service-intentions")
}
})
// Insert a bunch of legacy intentions.
makeIxn := func(src, dest string, allow bool) *structs.Intention {
ixn := &structs.Intention{
ID: generateUUID(),
SourceNS: structs.IntentionDefaultNamespace,
SourceName: src,
DestinationNS: structs.IntentionDefaultNamespace,
DestinationName: dest,
SourceType: structs.IntentionSourceConsul,
Meta: map[string]string{},
}
if allow {
ixn.Action = structs.IntentionActionAllow
} else {
ixn.Action = structs.IntentionActionDeny
}
//nolint:staticcheck
ixn.UpdatePrecedence()
//nolint:staticcheck
ixn.SetHash()
return ixn
}
ixns := []*structs.Intention{
makeIxn("api", "db", true),
makeIxn("web", "db", false),
makeIxn("*", "web", true),
makeIxn("*", "api", false),
makeIxn("intern", "*", false),
makeIxn("contractor", "*", false),
makeIxn("*", "*", true),
}
for _, ixn := range ixns {
ixn2 := *ixn
resp, err := s1pre.raftApply(structs.IntentionRequestType, &structs.IntentionRequest{
Op: structs.IntentionOpCreate,
Intention: &ixn2,
})
require.NoError(t, err)
if respErr, ok := resp.(error); ok {
t.Fatalf("respErr: %v", respErr)
}
}
mapify := func(ixns []*structs.Intention) map[string]*structs.Intention {
m := make(map[string]*structs.Intention)
for _, ixn := range ixns {
m[ixn.ID] = ixn
}
return m
}
checkIntentions := func(t *testing.T, srv *Server, legacyOnly bool, expect map[string]*structs.Intention) {
t.Helper()
wildMeta := structs.WildcardEnterpriseMeta()
retry.Run(t, func(r *retry.R) {
var (
got structs.Intentions
err error
)
if legacyOnly {
_, got, err = srv.fsm.State().LegacyIntentions(nil, wildMeta)
} else {
_, got, _, err = srv.fsm.State().Intentions(nil, wildMeta)
}
require.NoError(r, err)
gotM := mapify(got)
assert.Len(r, gotM, len(expect))
for k, expectV := range expect {
gotV, ok := gotM[k]
if !ok {
r.Errorf("results are missing key %q", k)
continue
}
assert.Equal(r, expectV.ID, gotV.ID)
assert.Equal(r, expectV.SourceNS, gotV.SourceNS)
assert.Equal(r, expectV.SourceName, gotV.SourceName)
assert.Equal(r, expectV.DestinationNS, gotV.DestinationNS)
assert.Equal(r, expectV.DestinationName, gotV.DestinationName)
assert.Equal(r, expectV.Action, gotV.Action)
assert.Equal(r, expectV.Meta, gotV.Meta)
assert.Equal(r, expectV.Precedence, gotV.Precedence)
assert.Equal(r, expectV.SourceType, gotV.SourceType)
}
})
}
expectM := mapify(ixns)
checkIntentions(t, s1pre, false, expectM)
checkIntentions(t, s1pre, true, expectM)
// Shutdown s1pre and restart it to trigger migration.
s1pre.Shutdown()
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.DataDir = s1pre.config.DataDir
c.Datacenter = "dc1"
c.NodeName = s1pre.config.NodeName
c.NodeID = s1pre.config.NodeID
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
// check that all 7 intentions are present before migration
checkIntentions(t, s1, false, expectM)
// Wait until the migration routine is complete.
retry.Run(t, func(r *retry.R) {
intentionFormat, err := s1.getSystemMetadata(structs.SystemMetadataIntentionFormatKey)
require.NoError(r, err)
if intentionFormat != structs.SystemMetadataIntentionFormatConfigValue {
r.Fatal("intention migration is not yet complete")
}
})
// check that all 7 intentions are present the general way after migration
checkIntentions(t, s1, false, expectM)
// check that no intentions exist in the legacy table
checkIntentions(t, s1, true, map[string]*structs.Intention{})
mapifyConfigs := func(entries interface{}) map[structs.ConfigEntryKindName]*structs.ServiceIntentionsConfigEntry {
m := make(map[structs.ConfigEntryKindName]*structs.ServiceIntentionsConfigEntry)
switch v := entries.(type) {
case []*structs.ServiceIntentionsConfigEntry:
for _, entry := range v {
kn := structs.NewConfigEntryKindName(entry.Kind, entry.Name, &entry.EnterpriseMeta)
m[kn] = entry
}
case []structs.ConfigEntry:
for _, entry := range v {
kn := structs.NewConfigEntryKindName(entry.GetKind(), entry.GetName(), entry.GetEnterpriseMeta())
m[kn] = entry.(*structs.ServiceIntentionsConfigEntry)
}
default:
t.Fatalf("bad type: %T", entries)
}
return m
}
// also check config entries
_, gotConfigs, err := s1.fsm.State().ConfigEntriesByKind(nil, structs.ServiceIntentions, structs.WildcardEnterpriseMeta())
require.NoError(t, err)
gotConfigsM := mapifyConfigs(gotConfigs)
expectConfigs := structs.MigrateIntentions(ixns)
for _, entry := range expectConfigs {
require.NoError(t, entry.LegacyNormalize()) // tidy them up the same way the write would
}
expectConfigsM := mapifyConfigs(expectConfigs)
assert.Len(t, gotConfigsM, len(expectConfigsM))
for kn, expectV := range expectConfigsM {
gotV, ok := gotConfigsM[kn]
if !ok {
t.Errorf("results are missing key %q", kn)
continue
}
// Migrated intentions won't have toplevel Meta.
assert.Nil(t, gotV.Meta)
require.Len(t, gotV.Sources, len(expectV.Sources))
expSrcMap := make(map[string]*structs.SourceIntention)
for i, src := range expectV.Sources {
require.NotEmpty(t, src.LegacyID, "index[%d] missing LegacyID", i)
// Do a shallow copy and strip the times from the copy
src2 := *src
src2.LegacyCreateTime = nil
src2.LegacyUpdateTime = nil
expSrcMap[src2.LegacyID] = &src2
}
for i, got := range gotV.Sources {
require.NotEmpty(t, got.LegacyID, "index[%d] missing LegacyID", i)
// Do a shallow copy and strip the times from the copy
got2 := *got
got2.LegacyCreateTime = nil
got2.LegacyUpdateTime = nil
cmp, ok := expSrcMap[got2.LegacyID]
require.True(t, ok, "missing %q", got2.LegacyID)
assert.Equal(t, cmp, &got2, "index[%d]", i)
}
}
}