From da9c91fd3df6d88f778c84e1bffa58b80750d7f6 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Thu, 28 Jun 2018 09:06:14 +0200 Subject: [PATCH 1/8] Only send one single ACL cache refresh across network when TTL is over It will allow the following: * when connectivity is limited (saturated linnks between DCs), only one single request to refresh an ACL will be sent to ACL master DC instead of statcking ACL refresh queries * when extend-cache is used for ACL, do not wait for result, but refresh the ACL asynchronously, so no delay is not impacting slave DC * When extend-cache is not used, keep the existing blocking mechanism, but only send a single refresh request. This will fix https://github.com/hashicorp/consul/issues/3524 --- agent/consul/acl.go | 79 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 7 deletions(-) diff --git a/agent/consul/acl.go b/agent/consul/acl.go index ce3282b40..0bf1dbb4c 100644 --- a/agent/consul/acl.go +++ b/agent/consul/acl.go @@ -4,6 +4,7 @@ import ( "fmt" "log" "os" + "sync" "time" "github.com/armon/go-metrics" @@ -116,6 +117,9 @@ type aclCache struct { // local is a function used to look for an ACL locally if replication is // enabled. This will be nil if replication isn't enabled. local acl.FaultFunc + + fetchMutex sync.RWMutex + fetchMap map[string][]chan (RemoteACLResult) } // newACLCache returns a new non-authoritative cache for ACLs. This is used for @@ -146,6 +150,11 @@ func newACLCache(conf *Config, logger *log.Logger, rpc rpcFn, local acl.FaultFun return cache, nil } +type RemoteACLResult struct { + result acl.ACL + err error +} + // lookupACL is used when we are non-authoritative, and need to resolve an ACL. func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) { // Check the cache for the ACL. @@ -161,8 +170,22 @@ func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) { return cached.ACL, nil } metrics.IncrCounter([]string{"acl", "cache_miss"}, 1) + res := c.lookupACLRemote(id, authDC, cached) + return res.result, res.err +} - // Attempt to refresh the policy from the ACL datacenter via an RPC. +func (c *aclCache) fireResult(id string, theACL acl.ACL, err error) { + c.fetchMutex.Lock() + channels := c.fetchMap[id] + delete(c.fetchMap, id) + c.fetchMutex.Unlock() + for _, cx := range channels { + cx <- RemoteACLResult{theACL, err} + close(cx) + } +} + +func (c *aclCache) loadACLInChan(id, authDC string, cached *aclCacheEntry) { args := structs.ACLPolicyRequest{ Datacenter: authDC, ACL: id, @@ -173,13 +196,21 @@ func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) { var reply structs.ACLPolicy err := c.rpc("ACL.GetPolicy", &args, &reply) if err == nil { - return c.useACLPolicy(id, authDC, cached, &reply) + theACL, theError := c.useACLPolicy(id, authDC, cached, &reply) + if cached != nil && theACL != nil { + cached.ACL = theACL + cached.ETag = reply.ETag + cached.Expires = time.Now().Add(c.config.ACLTTL) + } + c.fireResult(id, theACL, theError) + return } // Check for not-found, which will cause us to bail immediately. For any // other error we report it in the logs but can continue. if acl.IsErrNotFound(err) { - return nil, acl.ErrNotFound + c.fireResult(id, nil, acl.ErrNotFound) + return } c.logger.Printf("[ERR] consul.acl: Failed to get policy from ACL datacenter: %v", err) @@ -227,24 +258,58 @@ func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) { reply.TTL = c.config.ACLTTL reply.Parent = parent reply.Policy = policy - return c.useACLPolicy(id, authDC, cached, &reply) + theACL, theError := c.useACLPolicy(id, authDC, cached, &reply) + if cached != nil && theACL != nil { + cached.ACL = theACL + cached.ETag = reply.ETag + cached.Expires = time.Now().Add(c.config.ACLTTL) + } + c.fireResult(id, theACL, theError) + return } ACL_DOWN: // Unable to refresh, apply the down policy. switch c.config.ACLDownPolicy { case "allow": - return acl.AllowAll(), nil + c.fireResult(id, acl.AllowAll(), nil) + return case "extend-cache": if cached != nil { - return cached.ACL, nil + c.fireResult(id, cached.ACL, nil) + return } fallthrough default: - return acl.DenyAll(), nil + c.fireResult(id, acl.DenyAll(), nil) + return } } +func (c *aclCache) lookupACLRemote(id, authDC string, cached *aclCacheEntry) RemoteACLResult { + // Attempt to refresh the policy from the ACL datacenter via an RPC. + myChan := make(chan RemoteACLResult) + mustWaitForResult := cached == nil || c.config.ACLDownPolicy != "extend-cache" + c.fetchMutex.Lock() + clients, ok := c.fetchMap[id] + if !ok { + clients = make([]chan RemoteACLResult, 16) + } + if mustWaitForResult { + c.fetchMap[id] = append(clients, myChan) + } + c.fetchMutex.Unlock() + + if !ok { + go c.loadACLInChan(id, authDC, cached) + } + if !mustWaitForResult { + return RemoteACLResult{cached.ACL, nil} + } + res := <-myChan + return res +} + // useACLPolicy handles an ACLPolicy response func (c *aclCache) useACLPolicy(id, authDC string, cached *aclCacheEntry, p *structs.ACLPolicy) (acl.ACL, error) { // Check if we can used the cached policy From 8ea69290ac9e02dc1c6a679817b4cd606f186789 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Fri, 29 Jun 2018 10:59:36 +0200 Subject: [PATCH 2/8] Updated ACL guide --- website/source/docs/guides/acl.html.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/website/source/docs/guides/acl.html.md b/website/source/docs/guides/acl.html.md index a78f60b55..f6aad52bc 100644 --- a/website/source/docs/guides/acl.html.md +++ b/website/source/docs/guides/acl.html.md @@ -1061,6 +1061,10 @@ and the [`acl_down_policy`](/docs/agent/options.html#acl_down_policy) is set to "extend-cache", tokens will be resolved during the outage using the replicated set of ACLs. An [ACL replication status](/api/acl.html#acl_replication_status) endpoint is available to monitor the health of the replication process. +Also note that in recent versions of Consul (greater than 1.2.0), using +`acl_down_policy = "extend-cache"` refreshes token asynchronously when an ACL is +already cached and is expired. It allows to avoid having issues when connectivity with +the authoritative is not completely broken, but very slow. Locally-resolved ACLs will be cached using the [`acl_ttl`](/docs/agent/options.html#acl_ttl) setting of the non-authoritative datacenter, so these entries may persist in the From 382bec0897c615712da996c448b5759e8c2ee942 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Sun, 1 Jul 2018 12:50:53 +0200 Subject: [PATCH 3/8] Added async-cache with similar behaviour as extend-cache but asynchronously --- agent/acl.go | 1 + agent/acl_test.go | 133 ++++++++++++++++++++------------------- agent/config/runtime.go | 4 +- agent/consul/acl.go | 14 +++-- agent/consul/acl_test.go | 132 +++++++++++++++++++------------------- agent/consul/config.go | 6 +- 6 files changed, 153 insertions(+), 137 deletions(-) diff --git a/agent/acl.go b/agent/acl.go index e266feafa..1143da97c 100644 --- a/agent/acl.go +++ b/agent/acl.go @@ -104,6 +104,7 @@ func newACLManager(config *config.RuntimeConfig) (*aclManager, error) { case "deny": down = acl.DenyAll() case "extend-cache": + case "async-cache": // Leave the down policy as nil to signal this. default: return nil, fmt.Errorf("invalid ACL down policy %q", config.ACLDownPolicy) diff --git a/agent/acl_test.go b/agent/acl_test.go index 8b1626998..0932dc525 100644 --- a/agent/acl_test.go +++ b/agent/acl_test.go @@ -274,79 +274,82 @@ func TestACL_Down_Allow(t *testing.T) { func TestACL_Down_Extend(t *testing.T) { t.Parallel() - a := NewTestAgent(t.Name(), TestACLConfig()+` - acl_down_policy = "extend-cache" + aclExtendPolicies := []string{"extend-cache", "async-cache"} + for _, aclDownPolicy := range aclExtendPolicies { + a := NewTestAgent(t.Name(), TestACLConfig()+` + acl_down_policy = "`+aclDownPolicy+`" acl_enforce_version_8 = true `) - defer a.Shutdown() + defer a.Shutdown() - m := MockServer{ - // Populate the cache for one of the tokens. - getPolicyFn: func(req *structs.ACLPolicyRequest, reply *structs.ACLPolicy) error { - *reply = structs.ACLPolicy{ - Parent: "allow", - Policy: &rawacl.Policy{ - Agents: []*rawacl.AgentPolicy{ - &rawacl.AgentPolicy{ - Node: a.config.NodeName, - Policy: "read", + m := MockServer{ + // Populate the cache for one of the tokens. + getPolicyFn: func(req *structs.ACLPolicyRequest, reply *structs.ACLPolicy) error { + *reply = structs.ACLPolicy{ + Parent: "allow", + Policy: &rawacl.Policy{ + Agents: []*rawacl.AgentPolicy{ + &rawacl.AgentPolicy{ + Node: a.config.NodeName, + Policy: "read", + }, }, }, - }, - } - return nil - }, - } - if err := a.registerEndpoint("ACL", &m); err != nil { - t.Fatalf("err: %v", err) - } + } + return nil + }, + } + if err := a.registerEndpoint("ACL", &m); err != nil { + t.Fatalf("err: %v", err) + } - acl, err := a.resolveToken("yep") - if err != nil { - t.Fatalf("err: %v", err) - } - if acl == nil { - t.Fatalf("should not be nil") - } - if !acl.AgentRead(a.config.NodeName) { - t.Fatalf("should allow") - } - if acl.AgentWrite(a.config.NodeName) { - t.Fatalf("should deny") - } + acl, err := a.resolveToken("yep") + if err != nil { + t.Fatalf("err: %v", err) + } + if acl == nil { + t.Fatalf("should not be nil") + } + if !acl.AgentRead(a.config.NodeName) { + t.Fatalf("should allow") + } + if acl.AgentWrite(a.config.NodeName) { + t.Fatalf("should deny") + } - // Now take down ACLs and make sure a new token fails to resolve. - m.getPolicyFn = func(*structs.ACLPolicyRequest, *structs.ACLPolicy) error { - return fmt.Errorf("ACLs are broken") - } - acl, err = a.resolveToken("nope") - if err != nil { - t.Fatalf("err: %v", err) - } - if acl == nil { - t.Fatalf("should not be nil") - } - if acl.AgentRead(a.config.NodeName) { - t.Fatalf("should deny") - } - if acl.AgentWrite(a.config.NodeName) { - t.Fatalf("should deny") - } + // Now take down ACLs and make sure a new token fails to resolve. + m.getPolicyFn = func(*structs.ACLPolicyRequest, *structs.ACLPolicy) error { + return fmt.Errorf("ACLs are broken") + } + acl, err = a.resolveToken("nope") + if err != nil { + t.Fatalf("err: %v", err) + } + if acl == nil { + t.Fatalf("should not be nil") + } + if acl.AgentRead(a.config.NodeName) { + t.Fatalf("should deny") + } + if acl.AgentWrite(a.config.NodeName) { + t.Fatalf("should deny") + } - // Read the token from the cache while ACLs are broken, which should - // extend. - acl, err = a.resolveToken("yep") - if err != nil { - t.Fatalf("err: %v", err) - } - if acl == nil { - t.Fatalf("should not be nil") - } - if !acl.AgentRead(a.config.NodeName) { - t.Fatalf("should allow") - } - if acl.AgentWrite(a.config.NodeName) { - t.Fatalf("should deny") + // Read the token from the cache while ACLs are broken, which should + // extend. + acl, err = a.resolveToken("yep") + if err != nil { + t.Fatalf("err: %v", err) + } + if acl == nil { + t.Fatalf("should not be nil") + } + if !acl.AgentRead(a.config.NodeName) { + t.Fatalf("should allow") + } + if acl.AgentWrite(a.config.NodeName) { + t.Fatalf("should deny") + } } } diff --git a/agent/config/runtime.go b/agent/config/runtime.go index 9674d14d5..05ad8796c 100644 --- a/agent/config/runtime.go +++ b/agent/config/runtime.go @@ -94,8 +94,10 @@ type RuntimeConfig struct { // ACL's to be used to service requests. This // is the default. If the ACL is not in the cache, // this acts like deny. + // * async-cache - Same behaviour as extend-cache, but perform ACL + // Lookups asynchronously when cache TTL is expired. // - // hcl: acl_down_policy = ("allow"|"deny"|"extend-cache") + // hcl: acl_down_policy = ("allow"|"deny"|"extend-cache"|"async-cache") ACLDownPolicy string // ACLEnforceVersion8 is used to gate a set of ACL policy features that diff --git a/agent/consul/acl.go b/agent/consul/acl.go index 0bf1dbb4c..23bdbff35 100644 --- a/agent/consul/acl.go +++ b/agent/consul/acl.go @@ -146,10 +146,12 @@ func newACLCache(conf *Config, logger *log.Logger, rpc rpcFn, local acl.FaultFun if err != nil { return nil, fmt.Errorf("Failed to create ACL policy cache: %v", err) } + cache.fetchMap = make(map[string][]chan (RemoteACLResult)) return cache, nil } +// Result Type returned when fetching Remote ACLs asynchronously type RemoteACLResult struct { result acl.ACL err error @@ -179,8 +181,9 @@ func (c *aclCache) fireResult(id string, theACL acl.ACL, err error) { channels := c.fetchMap[id] delete(c.fetchMap, id) c.fetchMutex.Unlock() + aclResult := RemoteACLResult{theACL, err} for _, cx := range channels { - cx <- RemoteACLResult{theACL, err} + cx <- aclResult close(cx) } } @@ -231,7 +234,7 @@ func (c *aclCache) loadACLInChan(id, authDC string, cached *aclCacheEntry) { // local ACL fault function is registered to query replicated ACL data, // and the user's policy allows it, we will try locally before we give // up. - if c.local != nil && c.config.ACLDownPolicy == "extend-cache" { + if c.local != nil && (c.config.ACLDownPolicy == "extend-cache" || c.config.ACLDownPolicy == "async-cache") { parent, rules, err := c.local(id) if err != nil { // We don't make an exception here for ACLs that aren't @@ -274,6 +277,7 @@ ACL_DOWN: case "allow": c.fireResult(id, acl.AllowAll(), nil) return + case "async-cache": case "extend-cache": if cached != nil { c.fireResult(id, cached.ACL, nil) @@ -289,11 +293,11 @@ ACL_DOWN: func (c *aclCache) lookupACLRemote(id, authDC string, cached *aclCacheEntry) RemoteACLResult { // Attempt to refresh the policy from the ACL datacenter via an RPC. myChan := make(chan RemoteACLResult) - mustWaitForResult := cached == nil || c.config.ACLDownPolicy != "extend-cache" + mustWaitForResult := cached == nil || c.config.ACLDownPolicy != "async-cache" c.fetchMutex.Lock() clients, ok := c.fetchMap[id] - if !ok { - clients = make([]chan RemoteACLResult, 16) + if !ok || clients == nil { + clients = make([]chan RemoteACLResult, 0) } if mustWaitForResult { c.fetchMap[id] = append(clients, myChan) diff --git a/agent/consul/acl_test.go b/agent/consul/acl_test.go index ace1284a8..fd08b54ab 100644 --- a/agent/consul/acl_test.go +++ b/agent/consul/acl_test.go @@ -508,78 +508,82 @@ func TestACL_DownPolicy_Allow(t *testing.T) { func TestACL_DownPolicy_ExtendCache(t *testing.T) { t.Parallel() - dir1, s1 := testServerWithConfig(t, func(c *Config) { - c.ACLDatacenter = "dc1" - c.ACLTTL = 0 - c.ACLDownPolicy = "extend-cache" - c.ACLMasterToken = "root" - }) - defer os.RemoveAll(dir1) - defer s1.Shutdown() - client := rpcClient(t, s1) - defer client.Close() + aclExtendPolicies := []string{"extend-cache", "async-cache"} //"async-cache" - dir2, s2 := testServerWithConfig(t, func(c *Config) { - c.ACLDatacenter = "dc1" // Enable ACLs! - c.ACLTTL = 0 - c.ACLDownPolicy = "extend-cache" - c.Bootstrap = false // Disable bootstrap - }) - defer os.RemoveAll(dir2) - defer s2.Shutdown() + for _, aclDownPolicy := range aclExtendPolicies { + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.ACLDatacenter = "dc1" + c.ACLTTL = 0 + c.ACLDownPolicy = aclDownPolicy + c.ACLMasterToken = "root" + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + client := rpcClient(t, s1) + defer client.Close() - // Try to join - joinLAN(t, s2, s1) - retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2})) }) + dir2, s2 := testServerWithConfig(t, func(c *Config) { + c.ACLDatacenter = "dc1" // Enable ACLs! + c.ACLTTL = 0 + c.ACLDownPolicy = aclDownPolicy + c.Bootstrap = false // Disable bootstrap + }) + defer os.RemoveAll(dir2) + defer s2.Shutdown() - testrpc.WaitForLeader(t, s1.RPC, "dc1") + // Try to join + joinLAN(t, s2, s1) + retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2})) }) - // Create a new token - arg := structs.ACLRequest{ - Datacenter: "dc1", - Op: structs.ACLSet, - ACL: structs.ACL{ - Name: "User token", - Type: structs.ACLTypeClient, - Rules: testACLPolicy, - }, - WriteRequest: structs.WriteRequest{Token: "root"}, - } - var id string - if err := s1.RPC("ACL.Apply", &arg, &id); err != nil { - t.Fatalf("err: %v", err) - } + testrpc.WaitForLeader(t, s1.RPC, "dc1") - // find the non-authoritative server - var nonAuth *Server - var auth *Server - if !s1.IsLeader() { - nonAuth = s1 - auth = s2 - } else { - nonAuth = s2 - auth = s1 - } + // Create a new token + arg := structs.ACLRequest{ + Datacenter: "dc1", + Op: structs.ACLSet, + ACL: structs.ACL{ + Name: "User token", + Type: structs.ACLTypeClient, + Rules: testACLPolicy, + }, + WriteRequest: structs.WriteRequest{Token: "root"}, + } + var id string + if err := s1.RPC("ACL.Apply", &arg, &id); err != nil { + t.Fatalf("err: %v", err) + } - // Warm the caches - aclR, err := nonAuth.resolveToken(id) - if err != nil { - t.Fatalf("err: %v", err) - } - if aclR == nil { - t.Fatalf("bad acl: %#v", aclR) - } + // find the non-authoritative server + var nonAuth *Server + var auth *Server + if !s1.IsLeader() { + nonAuth = s1 + auth = s2 + } else { + nonAuth = s2 + auth = s1 + } - // Kill the authoritative server - auth.Shutdown() + // Warm the caches + aclR, err := nonAuth.resolveToken(id) + if err != nil { + t.Fatalf("err: %v", err) + } + if aclR == nil { + t.Fatalf("bad acl: %#v", aclR) + } - // Token should resolve into cached copy - aclR2, err := nonAuth.resolveToken(id) - if err != nil { - t.Fatalf("err: %v", err) - } - if aclR2 != aclR { - t.Fatalf("bad acl: %#v", aclR) + // Kill the authoritative server + auth.Shutdown() + + // Token should resolve into cached copy + aclR2, err := nonAuth.resolveToken(id) + if err != nil { + t.Fatalf("err: %v", err) + } + if aclR2 != aclR { + t.Fatalf("bad acl: %#v", aclR) + } } } diff --git a/agent/consul/config.go b/agent/consul/config.go index 29a524531..570a07c16 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -235,8 +235,9 @@ type Config struct { // ACLDownPolicy controls the behavior of ACLs if the ACLDatacenter // cannot be contacted. It can be either "deny" to deny all requests, - // or "extend-cache" which ignores the ACLCacheInterval and uses - // cached policies. If a policy is not in the cache, it acts like deny. + // "extend-cache" or "async-cache" which ignores the ACLCacheInterval and + // uses cached policies. + // If a policy is not in the cache, it acts like deny. // "allow" can be used to allow all requests. This is not recommended. ACLDownPolicy string @@ -378,6 +379,7 @@ func (c *Config) CheckACL() error { switch c.ACLDownPolicy { case "allow": case "deny": + case "async-cache": case "extend-cache": default: return fmt.Errorf("Unsupported down ACL policy: %s", c.ACLDownPolicy) From 6dfbbf135006313d6986ac71edbb110da6edca82 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Sun, 1 Jul 2018 20:00:20 +0200 Subject: [PATCH 4/8] Updated documentation and adding more test case for async-cache --- agent/consul/acl_test.go | 198 +++++++++++----------- website/source/docs/agent/options.html.md | 6 +- website/source/docs/guides/acl.html.md | 7 +- 3 files changed, 109 insertions(+), 102 deletions(-) diff --git a/agent/consul/acl_test.go b/agent/consul/acl_test.go index fd08b54ab..45048f80a 100644 --- a/agent/consul/acl_test.go +++ b/agent/consul/acl_test.go @@ -589,116 +589,120 @@ func TestACL_DownPolicy_ExtendCache(t *testing.T) { func TestACL_Replication(t *testing.T) { t.Parallel() - dir1, s1 := testServerWithConfig(t, func(c *Config) { - c.ACLDatacenter = "dc1" - c.ACLMasterToken = "root" - }) - defer os.RemoveAll(dir1) - defer s1.Shutdown() - client := rpcClient(t, s1) - defer client.Close() + aclExtendPolicies := []string{"extend-cache", "async-cache"} //"async-cache" - dir2, s2 := testServerWithConfig(t, func(c *Config) { - c.Datacenter = "dc2" - c.ACLDatacenter = "dc1" - c.ACLDefaultPolicy = "deny" - c.ACLDownPolicy = "extend-cache" - c.EnableACLReplication = true - c.ACLReplicationInterval = 10 * time.Millisecond - c.ACLReplicationApplyLimit = 1000000 - }) - s2.tokens.UpdateACLReplicationToken("root") - defer os.RemoveAll(dir2) - defer s2.Shutdown() + for _, aclDownPolicy := range aclExtendPolicies { + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.ACLDatacenter = "dc1" + c.ACLMasterToken = "root" + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + client := rpcClient(t, s1) + defer client.Close() - dir3, s3 := testServerWithConfig(t, func(c *Config) { - c.Datacenter = "dc3" - c.ACLDatacenter = "dc1" - c.ACLDownPolicy = "deny" - c.EnableACLReplication = true - c.ACLReplicationInterval = 10 * time.Millisecond - c.ACLReplicationApplyLimit = 1000000 - }) - s3.tokens.UpdateACLReplicationToken("root") - defer os.RemoveAll(dir3) - defer s3.Shutdown() + dir2, s2 := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "dc2" + c.ACLDatacenter = "dc1" + c.ACLDefaultPolicy = "deny" + c.ACLDownPolicy = aclDownPolicy + c.EnableACLReplication = true + c.ACLReplicationInterval = 10 * time.Millisecond + c.ACLReplicationApplyLimit = 1000000 + }) + s2.tokens.UpdateACLReplicationToken("root") + defer os.RemoveAll(dir2) + defer s2.Shutdown() - // Try to join. - joinWAN(t, s2, s1) - joinWAN(t, s3, s1) - testrpc.WaitForLeader(t, s1.RPC, "dc1") - testrpc.WaitForLeader(t, s1.RPC, "dc2") - testrpc.WaitForLeader(t, s1.RPC, "dc3") + dir3, s3 := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "dc3" + c.ACLDatacenter = "dc1" + c.ACLDownPolicy = "deny" + c.EnableACLReplication = true + c.ACLReplicationInterval = 10 * time.Millisecond + c.ACLReplicationApplyLimit = 1000000 + }) + s3.tokens.UpdateACLReplicationToken("root") + defer os.RemoveAll(dir3) + defer s3.Shutdown() - // Create a new token. - arg := structs.ACLRequest{ - Datacenter: "dc1", - Op: structs.ACLSet, - ACL: structs.ACL{ - Name: "User token", - Type: structs.ACLTypeClient, - Rules: testACLPolicy, - }, - WriteRequest: structs.WriteRequest{Token: "root"}, - } - var id string - if err := s1.RPC("ACL.Apply", &arg, &id); err != nil { - t.Fatalf("err: %v", err) - } - // Wait for replication to occur. - retry.Run(t, func(r *retry.R) { - _, acl, err := s2.fsm.State().ACLGet(nil, id) + // Try to join. + joinWAN(t, s2, s1) + joinWAN(t, s3, s1) + testrpc.WaitForLeader(t, s1.RPC, "dc1") + testrpc.WaitForLeader(t, s1.RPC, "dc2") + testrpc.WaitForLeader(t, s1.RPC, "dc3") + + // Create a new token. + arg := structs.ACLRequest{ + Datacenter: "dc1", + Op: structs.ACLSet, + ACL: structs.ACL{ + Name: "User token", + Type: structs.ACLTypeClient, + Rules: testACLPolicy, + }, + WriteRequest: structs.WriteRequest{Token: "root"}, + } + var id string + if err := s1.RPC("ACL.Apply", &arg, &id); err != nil { + t.Fatalf("err: %v", err) + } + // Wait for replication to occur. + retry.Run(t, func(r *retry.R) { + _, acl, err := s2.fsm.State().ACLGet(nil, id) + if err != nil { + r.Fatal(err) + } + if acl == nil { + r.Fatal(nil) + } + _, acl, err = s3.fsm.State().ACLGet(nil, id) + if err != nil { + r.Fatal(err) + } + if acl == nil { + r.Fatal(nil) + } + }) + + // Kill the ACL datacenter. + s1.Shutdown() + + // Token should resolve on s2, which has replication + extend-cache. + acl, err := s2.resolveToken(id) if err != nil { - r.Fatal(err) + t.Fatalf("err: %v", err) } if acl == nil { - r.Fatal(nil) + t.Fatalf("missing acl") } - _, acl, err = s3.fsm.State().ACLGet(nil, id) + + // Check the policy + if acl.KeyRead("bar") { + t.Fatalf("unexpected read") + } + if !acl.KeyRead("foo/test") { + t.Fatalf("unexpected failed read") + } + + // Although s3 has replication, and we verified that the ACL is there, + // it can not be used because of the down policy. + acl, err = s3.resolveToken(id) if err != nil { - r.Fatal(err) + t.Fatalf("err: %v", err) } if acl == nil { - r.Fatal(nil) + t.Fatalf("missing acl") } - }) - // Kill the ACL datacenter. - s1.Shutdown() - - // Token should resolve on s2, which has replication + extend-cache. - acl, err := s2.resolveToken(id) - if err != nil { - t.Fatalf("err: %v", err) - } - if acl == nil { - t.Fatalf("missing acl") - } - - // Check the policy - if acl.KeyRead("bar") { - t.Fatalf("unexpected read") - } - if !acl.KeyRead("foo/test") { - t.Fatalf("unexpected failed read") - } - - // Although s3 has replication, and we verified that the ACL is there, - // it can not be used because of the down policy. - acl, err = s3.resolveToken(id) - if err != nil { - t.Fatalf("err: %v", err) - } - if acl == nil { - t.Fatalf("missing acl") - } - - // Check the policy. - if acl.KeyRead("bar") { - t.Fatalf("unexpected read") - } - if acl.KeyRead("foo/test") { - t.Fatalf("unexpected read") + // Check the policy. + if acl.KeyRead("bar") { + t.Fatalf("unexpected read") + } + if acl.KeyRead("foo/test") { + t.Fatalf("unexpected read") + } } } diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index 4e2ecb303..65402c2ca 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -496,11 +496,13 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass to enable ACL support. * `acl_down_policy` - Either - "allow", "deny" or "extend-cache"; "extend-cache" is the default. In the case that the + "allow", "deny", "extend-cache" or "async-cache"; "extend-cache" is the default. In the case that the policy for a token cannot be read from the [`acl_datacenter`](#acl_datacenter) or leader node, the down policy is applied. In "allow" mode, all actions are permitted, "deny" restricts all operations, and "extend-cache" allows any cached ACLs to be used, ignoring their TTL - values. If a non-cached ACL is used, "extend-cache" acts like "deny". + values. If a non-cached ACL is used, "extend-cache" acts like "deny". "async-cache" acts the same + way as "extend-cache" but performs updates asynchronously when ACL is present but its TTL is + expired. * `acl_agent_master_token` - Used to access agent endpoints that require agent read diff --git a/website/source/docs/guides/acl.html.md b/website/source/docs/guides/acl.html.md index f6aad52bc..c25666ab9 100644 --- a/website/source/docs/guides/acl.html.md +++ b/website/source/docs/guides/acl.html.md @@ -1062,9 +1062,10 @@ is set to "extend-cache", tokens will be resolved during the outage using the replicated set of ACLs. An [ACL replication status](/api/acl.html#acl_replication_status) endpoint is available to monitor the health of the replication process. Also note that in recent versions of Consul (greater than 1.2.0), using -`acl_down_policy = "extend-cache"` refreshes token asynchronously when an ACL is -already cached and is expired. It allows to avoid having issues when connectivity with -the authoritative is not completely broken, but very slow. +`acl_down_policy = "async-cache"` refreshes token asynchronously when an ACL is +already cached and is expired while similar semantics than "extend-cache". +It allows to avoid having issues when connectivity with the authoritative is not completely +broken, but very slow. Locally-resolved ACLs will be cached using the [`acl_ttl`](/docs/agent/options.html#acl_ttl) setting of the non-authoritative datacenter, so these entries may persist in the From d8a6571683de666c094dc563dc708d5bfc5a19af Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Sun, 1 Jul 2018 23:37:40 +0200 Subject: [PATCH 5/8] Improve doc for async-cache --- website/source/docs/agent/options.html.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index 65402c2ca..06f071dd0 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -500,9 +500,10 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass policy for a token cannot be read from the [`acl_datacenter`](#acl_datacenter) or leader node, the down policy is applied. In "allow" mode, all actions are permitted, "deny" restricts all operations, and "extend-cache" allows any cached ACLs to be used, ignoring their TTL - values. If a non-cached ACL is used, "extend-cache" acts like "deny". "async-cache" acts the same - way as "extend-cache" but performs updates asynchronously when ACL is present but its TTL is - expired. + values. If a non-cached ACL is used, "extend-cache" acts like "deny". + The value "async-cache" acts the same way as "extend-cache" but performs updates + asynchronously when ACL is present but its TTL is expired, thus, if latency is bad between + ACL authoritative and other datacenters, latency of operations is not impacted. * `acl_agent_master_token` - Used to access agent endpoints that require agent read From f675575b66e68befcc77d32327f5fa110523af06 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Mon, 2 Jul 2018 00:24:37 +0200 Subject: [PATCH 6/8] Trying to fix Travis tests --- GNUmakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GNUmakefile b/GNUmakefile index 5df83416b..d510c0fe6 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -158,7 +158,7 @@ test: other-consul dev-build vet @# hide it from travis as it exceeds their log limits and causes job to be @# terminated (over 4MB and over 10k lines in the UI). We need to output @# _something_ to stop them terminating us due to inactivity... - { go test $(GOTEST_FLAGS) -tags '$(GOTAGS)' -timeout 5m $(GOTEST_PKGS) 2>&1 ; echo $$? > exit-code ; } | tee test.log | egrep '^(ok|FAIL)\s*github.com/hashicorp/consul' + { go test $(GOTEST_FLAGS) -tags '$(GOTAGS)' -timeout 7m $(GOTEST_PKGS) 2>&1 ; echo $$? > exit-code ; } | tee test.log | egrep '^(ok|FAIL)\s*github.com/hashicorp/consul' @echo "Exit code: $$(cat exit-code)" >> test.log @# This prints all the race report between ====== lines @awk '/^WARNING: DATA RACE/ {do_print=1; print "=================="} do_print==1 {print} /^={10,}/ {do_print=0}' test.log || true From 95a0ab9f999b45f86ce347a4d830e43bfee68a62 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Mon, 2 Jul 2018 17:39:34 +0200 Subject: [PATCH 7/8] Updated swith case to use same branch for async-cache and extend-cache --- agent/acl.go | 3 +-- agent/consul/acl.go | 3 +-- agent/consul/config.go | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/agent/acl.go b/agent/acl.go index 1143da97c..5c4deb7cc 100644 --- a/agent/acl.go +++ b/agent/acl.go @@ -103,8 +103,7 @@ func newACLManager(config *config.RuntimeConfig) (*aclManager, error) { down = acl.AllowAll() case "deny": down = acl.DenyAll() - case "extend-cache": - case "async-cache": + case "async-cache", "extend-cache": // Leave the down policy as nil to signal this. default: return nil, fmt.Errorf("invalid ACL down policy %q", config.ACLDownPolicy) diff --git a/agent/consul/acl.go b/agent/consul/acl.go index 23bdbff35..4dbfe2b39 100644 --- a/agent/consul/acl.go +++ b/agent/consul/acl.go @@ -277,8 +277,7 @@ ACL_DOWN: case "allow": c.fireResult(id, acl.AllowAll(), nil) return - case "async-cache": - case "extend-cache": + case "async-cache", "extend-cache": if cached != nil { c.fireResult(id, cached.ACL, nil) return diff --git a/agent/consul/config.go b/agent/consul/config.go index 570a07c16..b878d9a99 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -379,8 +379,7 @@ func (c *Config) CheckACL() error { switch c.ACLDownPolicy { case "allow": case "deny": - case "async-cache": - case "extend-cache": + case "async-cache", "extend-cache": default: return fmt.Errorf("Unsupported down ACL policy: %s", c.ACLDownPolicy) } From 135ac85b214f97048d82d5a9f22c0dc8324b8f6d Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Sat, 7 Jul 2018 14:03:34 +0200 Subject: [PATCH 8/8] Fixed indentation in test --- agent/acl_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agent/acl_test.go b/agent/acl_test.go index 0932dc525..6b912646e 100644 --- a/agent/acl_test.go +++ b/agent/acl_test.go @@ -277,9 +277,9 @@ func TestACL_Down_Extend(t *testing.T) { aclExtendPolicies := []string{"extend-cache", "async-cache"} for _, aclDownPolicy := range aclExtendPolicies { a := NewTestAgent(t.Name(), TestACLConfig()+` - acl_down_policy = "`+aclDownPolicy+`" - acl_enforce_version_8 = true - `) + acl_down_policy = "`+aclDownPolicy+`" + acl_enforce_version_8 = true + `) defer a.Shutdown() m := MockServer{