acl: use the presence of a management policy in the state store as a sign that we already migrated to v2 acls (#9505)
This way we only have to wait for the serf barrier to pass once before we can upgrade to v2 acls. Without this patch every restart needs to re-compute the change, and potentially if a stray older node joins after a migration it might regress back to v1 mode which would be problematic.
This commit is contained in:
parent
b7dc2ae0b5
commit
db62541676
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:improvement
|
||||||
|
acl: use the presence of a management policy in the state store as a sign that we already migrated to v2 acls
|
||||||
|
```
|
|
@ -108,6 +108,16 @@ func (s *Server) canUpgradeToNewACLs(isLeader bool) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check to see if we already upgraded the last time we ran by seeing if we
|
||||||
|
// have a copy of any global management policy stored locally. This should
|
||||||
|
// always be true because policies always replicate.
|
||||||
|
_, mgmtPolicy, err := s.fsm.State().ACLPolicyGetByID(nil, structs.ACLPolicyGlobalManagementID, structs.DefaultEnterpriseMeta())
|
||||||
|
if err != nil {
|
||||||
|
s.logger.Warn("Failed to get the builtin global-management policy to check for a completed ACL upgrade; skipping this optimization", "error", err)
|
||||||
|
} else if mgmtPolicy != nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
if !s.InACLDatacenter() {
|
if !s.InACLDatacenter() {
|
||||||
foundServers, mode, _ := ServersGetACLMode(s, "", s.config.ACLDatacenter)
|
foundServers, mode, _ := ServersGetACLMode(s, "", s.config.ACLDatacenter)
|
||||||
if mode != structs.ACLModeEnabled || !foundServers {
|
if mode != structs.ACLModeEnabled || !foundServers {
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/hashicorp/consul/agent/structs"
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
|
tokenStore "github.com/hashicorp/consul/agent/token"
|
||||||
"github.com/hashicorp/consul/api"
|
"github.com/hashicorp/consul/api"
|
||||||
"github.com/hashicorp/consul/sdk/testutil"
|
"github.com/hashicorp/consul/sdk/testutil"
|
||||||
"github.com/hashicorp/consul/sdk/testutil/retry"
|
"github.com/hashicorp/consul/sdk/testutil/retry"
|
||||||
|
@ -1272,6 +1273,99 @@ func TestLeader_ACLUpgrade(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLeader_ACLUpgrade_IsStickyEvenIfSerfTagsRegress(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("too slow for testing.Short")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// We test this by having two datacenters with one server each. They
|
||||||
|
// initially come up and complete the migration, then we power them both
|
||||||
|
// off. We leave the primary off permanently, and then we stand up the
|
||||||
|
// secondary. Hopefully it should transition to ENABLED instead of being
|
||||||
|
// stuck in LEGACY.
|
||||||
|
|
||||||
|
dir1, s1 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.Datacenter = "dc1"
|
||||||
|
c.ACLDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLMasterToken = "root"
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir1)
|
||||||
|
defer s1.Shutdown()
|
||||||
|
codec := rpcClient(t, s1)
|
||||||
|
defer codec.Close()
|
||||||
|
|
||||||
|
waitForLeaderEstablishment(t, s1)
|
||||||
|
|
||||||
|
dir2, s2 := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.Datacenter = "dc2"
|
||||||
|
c.ACLDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLTokenReplication = false
|
||||||
|
c.ACLReplicationRate = 100
|
||||||
|
c.ACLReplicationBurst = 100
|
||||||
|
c.ACLReplicationApplyLimit = 1000000
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir2)
|
||||||
|
defer s2.Shutdown()
|
||||||
|
codec2 := rpcClient(t, s2)
|
||||||
|
defer codec2.Close()
|
||||||
|
|
||||||
|
s2.tokens.UpdateReplicationToken("root", tokenStore.TokenSourceConfig)
|
||||||
|
|
||||||
|
testrpc.WaitForLeader(t, s2.RPC, "dc2")
|
||||||
|
waitForLeaderEstablishment(t, s2)
|
||||||
|
|
||||||
|
// Create the WAN link
|
||||||
|
joinWAN(t, s2, s1)
|
||||||
|
waitForLeaderEstablishment(t, s1)
|
||||||
|
waitForLeaderEstablishment(t, s2)
|
||||||
|
|
||||||
|
waitForNewACLs(t, s1)
|
||||||
|
waitForNewACLs(t, s2)
|
||||||
|
waitForNewACLReplication(t, s2, structs.ACLReplicatePolicies, 1, 0, 0)
|
||||||
|
|
||||||
|
// Everybody has the management policy.
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
_, policy1, err := s1.fsm.State().ACLPolicyGetByID(nil, structs.ACLPolicyGlobalManagementID, structs.DefaultEnterpriseMeta())
|
||||||
|
require.NoError(r, err)
|
||||||
|
require.NotNil(r, policy1)
|
||||||
|
|
||||||
|
_, policy2, err := s2.fsm.State().ACLPolicyGetByID(nil, structs.ACLPolicyGlobalManagementID, structs.DefaultEnterpriseMeta())
|
||||||
|
require.NoError(r, err)
|
||||||
|
require.NotNil(r, policy2)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Shutdown s1 and s2.
|
||||||
|
s1.Shutdown()
|
||||||
|
s2.Shutdown()
|
||||||
|
|
||||||
|
// Restart just s2
|
||||||
|
|
||||||
|
dir2new, s2new := testServerWithConfig(t, func(c *Config) {
|
||||||
|
c.Datacenter = "dc2"
|
||||||
|
c.ACLDatacenter = "dc1"
|
||||||
|
c.ACLsEnabled = true
|
||||||
|
c.ACLTokenReplication = false
|
||||||
|
c.ACLReplicationRate = 100
|
||||||
|
c.ACLReplicationBurst = 100
|
||||||
|
c.ACLReplicationApplyLimit = 1000000
|
||||||
|
|
||||||
|
c.DataDir = s2.config.DataDir
|
||||||
|
c.NodeName = s2.config.NodeName
|
||||||
|
c.NodeID = s2.config.NodeID
|
||||||
|
})
|
||||||
|
defer os.RemoveAll(dir2new)
|
||||||
|
defer s2new.Shutdown()
|
||||||
|
|
||||||
|
waitForLeaderEstablishment(t, s2new)
|
||||||
|
|
||||||
|
// It should be able to transition without connectivity to the primary.
|
||||||
|
waitForNewACLs(t, s2new)
|
||||||
|
}
|
||||||
|
|
||||||
func TestLeader_ConfigEntryBootstrap(t *testing.T) {
|
func TestLeader_ConfigEntryBootstrap(t *testing.T) {
|
||||||
if testing.Short() {
|
if testing.Short() {
|
||||||
t.Skip("too slow for testing.Short")
|
t.Skip("too slow for testing.Short")
|
||||||
|
|
Loading…
Reference in New Issue