From da9c91fd3df6d88f778c84e1bffa58b80750d7f6 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Thu, 28 Jun 2018 09:06:14 +0200
Subject: [PATCH 1/8] Only send one single ACL cache refresh across network
when TTL is over
It will allow the following:
* when connectivity is limited (saturated linnks between DCs), only one
single request to refresh an ACL will be sent to ACL master DC instead
of statcking ACL refresh queries
* when extend-cache is used for ACL, do not wait for result, but refresh
the ACL asynchronously, so no delay is not impacting slave DC
* When extend-cache is not used, keep the existing blocking mechanism,
but only send a single refresh request.
This will fix https://github.com/hashicorp/consul/issues/3524
---
agent/consul/acl.go | 79 +++++++++++++++++++++++++++++++++++++++++----
1 file changed, 72 insertions(+), 7 deletions(-)
diff --git a/agent/consul/acl.go b/agent/consul/acl.go
index ce3282b40..0bf1dbb4c 100644
--- a/agent/consul/acl.go
+++ b/agent/consul/acl.go
@@ -4,6 +4,7 @@ import (
"fmt"
"log"
"os"
+ "sync"
"time"
"github.com/armon/go-metrics"
@@ -116,6 +117,9 @@ type aclCache struct {
// local is a function used to look for an ACL locally if replication is
// enabled. This will be nil if replication isn't enabled.
local acl.FaultFunc
+
+ fetchMutex sync.RWMutex
+ fetchMap map[string][]chan (RemoteACLResult)
}
// newACLCache returns a new non-authoritative cache for ACLs. This is used for
@@ -146,6 +150,11 @@ func newACLCache(conf *Config, logger *log.Logger, rpc rpcFn, local acl.FaultFun
return cache, nil
}
+type RemoteACLResult struct {
+ result acl.ACL
+ err error
+}
+
// lookupACL is used when we are non-authoritative, and need to resolve an ACL.
func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) {
// Check the cache for the ACL.
@@ -161,8 +170,22 @@ func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) {
return cached.ACL, nil
}
metrics.IncrCounter([]string{"acl", "cache_miss"}, 1)
+ res := c.lookupACLRemote(id, authDC, cached)
+ return res.result, res.err
+}
- // Attempt to refresh the policy from the ACL datacenter via an RPC.
+func (c *aclCache) fireResult(id string, theACL acl.ACL, err error) {
+ c.fetchMutex.Lock()
+ channels := c.fetchMap[id]
+ delete(c.fetchMap, id)
+ c.fetchMutex.Unlock()
+ for _, cx := range channels {
+ cx <- RemoteACLResult{theACL, err}
+ close(cx)
+ }
+}
+
+func (c *aclCache) loadACLInChan(id, authDC string, cached *aclCacheEntry) {
args := structs.ACLPolicyRequest{
Datacenter: authDC,
ACL: id,
@@ -173,13 +196,21 @@ func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) {
var reply structs.ACLPolicy
err := c.rpc("ACL.GetPolicy", &args, &reply)
if err == nil {
- return c.useACLPolicy(id, authDC, cached, &reply)
+ theACL, theError := c.useACLPolicy(id, authDC, cached, &reply)
+ if cached != nil && theACL != nil {
+ cached.ACL = theACL
+ cached.ETag = reply.ETag
+ cached.Expires = time.Now().Add(c.config.ACLTTL)
+ }
+ c.fireResult(id, theACL, theError)
+ return
}
// Check for not-found, which will cause us to bail immediately. For any
// other error we report it in the logs but can continue.
if acl.IsErrNotFound(err) {
- return nil, acl.ErrNotFound
+ c.fireResult(id, nil, acl.ErrNotFound)
+ return
}
c.logger.Printf("[ERR] consul.acl: Failed to get policy from ACL datacenter: %v", err)
@@ -227,24 +258,58 @@ func (c *aclCache) lookupACL(id, authDC string) (acl.ACL, error) {
reply.TTL = c.config.ACLTTL
reply.Parent = parent
reply.Policy = policy
- return c.useACLPolicy(id, authDC, cached, &reply)
+ theACL, theError := c.useACLPolicy(id, authDC, cached, &reply)
+ if cached != nil && theACL != nil {
+ cached.ACL = theACL
+ cached.ETag = reply.ETag
+ cached.Expires = time.Now().Add(c.config.ACLTTL)
+ }
+ c.fireResult(id, theACL, theError)
+ return
}
ACL_DOWN:
// Unable to refresh, apply the down policy.
switch c.config.ACLDownPolicy {
case "allow":
- return acl.AllowAll(), nil
+ c.fireResult(id, acl.AllowAll(), nil)
+ return
case "extend-cache":
if cached != nil {
- return cached.ACL, nil
+ c.fireResult(id, cached.ACL, nil)
+ return
}
fallthrough
default:
- return acl.DenyAll(), nil
+ c.fireResult(id, acl.DenyAll(), nil)
+ return
}
}
+func (c *aclCache) lookupACLRemote(id, authDC string, cached *aclCacheEntry) RemoteACLResult {
+ // Attempt to refresh the policy from the ACL datacenter via an RPC.
+ myChan := make(chan RemoteACLResult)
+ mustWaitForResult := cached == nil || c.config.ACLDownPolicy != "extend-cache"
+ c.fetchMutex.Lock()
+ clients, ok := c.fetchMap[id]
+ if !ok {
+ clients = make([]chan RemoteACLResult, 16)
+ }
+ if mustWaitForResult {
+ c.fetchMap[id] = append(clients, myChan)
+ }
+ c.fetchMutex.Unlock()
+
+ if !ok {
+ go c.loadACLInChan(id, authDC, cached)
+ }
+ if !mustWaitForResult {
+ return RemoteACLResult{cached.ACL, nil}
+ }
+ res := <-myChan
+ return res
+}
+
// useACLPolicy handles an ACLPolicy response
func (c *aclCache) useACLPolicy(id, authDC string, cached *aclCacheEntry, p *structs.ACLPolicy) (acl.ACL, error) {
// Check if we can used the cached policy
From 8ea69290ac9e02dc1c6a679817b4cd606f186789 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Fri, 29 Jun 2018 10:59:36 +0200
Subject: [PATCH 2/8] Updated ACL guide
---
website/source/docs/guides/acl.html.md | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/website/source/docs/guides/acl.html.md b/website/source/docs/guides/acl.html.md
index a78f60b55..f6aad52bc 100644
--- a/website/source/docs/guides/acl.html.md
+++ b/website/source/docs/guides/acl.html.md
@@ -1061,6 +1061,10 @@ and the [`acl_down_policy`](/docs/agent/options.html#acl_down_policy)
is set to "extend-cache", tokens will be resolved during the outage using the
replicated set of ACLs. An [ACL replication status](/api/acl.html#acl_replication_status)
endpoint is available to monitor the health of the replication process.
+Also note that in recent versions of Consul (greater than 1.2.0), using
+`acl_down_policy = "extend-cache"` refreshes token asynchronously when an ACL is
+already cached and is expired. It allows to avoid having issues when connectivity with
+the authoritative is not completely broken, but very slow.
Locally-resolved ACLs will be cached using the [`acl_ttl`](/docs/agent/options.html#acl_ttl)
setting of the non-authoritative datacenter, so these entries may persist in the
From 382bec0897c615712da996c448b5759e8c2ee942 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Sun, 1 Jul 2018 12:50:53 +0200
Subject: [PATCH 3/8] Added async-cache with similar behaviour as extend-cache
but asynchronously
---
agent/acl.go | 1 +
agent/acl_test.go | 133 ++++++++++++++++++++-------------------
agent/config/runtime.go | 4 +-
agent/consul/acl.go | 14 +++--
agent/consul/acl_test.go | 132 +++++++++++++++++++-------------------
agent/consul/config.go | 6 +-
6 files changed, 153 insertions(+), 137 deletions(-)
diff --git a/agent/acl.go b/agent/acl.go
index e266feafa..1143da97c 100644
--- a/agent/acl.go
+++ b/agent/acl.go
@@ -104,6 +104,7 @@ func newACLManager(config *config.RuntimeConfig) (*aclManager, error) {
case "deny":
down = acl.DenyAll()
case "extend-cache":
+ case "async-cache":
// Leave the down policy as nil to signal this.
default:
return nil, fmt.Errorf("invalid ACL down policy %q", config.ACLDownPolicy)
diff --git a/agent/acl_test.go b/agent/acl_test.go
index 8b1626998..0932dc525 100644
--- a/agent/acl_test.go
+++ b/agent/acl_test.go
@@ -274,79 +274,82 @@ func TestACL_Down_Allow(t *testing.T) {
func TestACL_Down_Extend(t *testing.T) {
t.Parallel()
- a := NewTestAgent(t.Name(), TestACLConfig()+`
- acl_down_policy = "extend-cache"
+ aclExtendPolicies := []string{"extend-cache", "async-cache"}
+ for _, aclDownPolicy := range aclExtendPolicies {
+ a := NewTestAgent(t.Name(), TestACLConfig()+`
+ acl_down_policy = "`+aclDownPolicy+`"
acl_enforce_version_8 = true
`)
- defer a.Shutdown()
+ defer a.Shutdown()
- m := MockServer{
- // Populate the cache for one of the tokens.
- getPolicyFn: func(req *structs.ACLPolicyRequest, reply *structs.ACLPolicy) error {
- *reply = structs.ACLPolicy{
- Parent: "allow",
- Policy: &rawacl.Policy{
- Agents: []*rawacl.AgentPolicy{
- &rawacl.AgentPolicy{
- Node: a.config.NodeName,
- Policy: "read",
+ m := MockServer{
+ // Populate the cache for one of the tokens.
+ getPolicyFn: func(req *structs.ACLPolicyRequest, reply *structs.ACLPolicy) error {
+ *reply = structs.ACLPolicy{
+ Parent: "allow",
+ Policy: &rawacl.Policy{
+ Agents: []*rawacl.AgentPolicy{
+ &rawacl.AgentPolicy{
+ Node: a.config.NodeName,
+ Policy: "read",
+ },
},
},
- },
- }
- return nil
- },
- }
- if err := a.registerEndpoint("ACL", &m); err != nil {
- t.Fatalf("err: %v", err)
- }
+ }
+ return nil
+ },
+ }
+ if err := a.registerEndpoint("ACL", &m); err != nil {
+ t.Fatalf("err: %v", err)
+ }
- acl, err := a.resolveToken("yep")
- if err != nil {
- t.Fatalf("err: %v", err)
- }
- if acl == nil {
- t.Fatalf("should not be nil")
- }
- if !acl.AgentRead(a.config.NodeName) {
- t.Fatalf("should allow")
- }
- if acl.AgentWrite(a.config.NodeName) {
- t.Fatalf("should deny")
- }
+ acl, err := a.resolveToken("yep")
+ if err != nil {
+ t.Fatalf("err: %v", err)
+ }
+ if acl == nil {
+ t.Fatalf("should not be nil")
+ }
+ if !acl.AgentRead(a.config.NodeName) {
+ t.Fatalf("should allow")
+ }
+ if acl.AgentWrite(a.config.NodeName) {
+ t.Fatalf("should deny")
+ }
- // Now take down ACLs and make sure a new token fails to resolve.
- m.getPolicyFn = func(*structs.ACLPolicyRequest, *structs.ACLPolicy) error {
- return fmt.Errorf("ACLs are broken")
- }
- acl, err = a.resolveToken("nope")
- if err != nil {
- t.Fatalf("err: %v", err)
- }
- if acl == nil {
- t.Fatalf("should not be nil")
- }
- if acl.AgentRead(a.config.NodeName) {
- t.Fatalf("should deny")
- }
- if acl.AgentWrite(a.config.NodeName) {
- t.Fatalf("should deny")
- }
+ // Now take down ACLs and make sure a new token fails to resolve.
+ m.getPolicyFn = func(*structs.ACLPolicyRequest, *structs.ACLPolicy) error {
+ return fmt.Errorf("ACLs are broken")
+ }
+ acl, err = a.resolveToken("nope")
+ if err != nil {
+ t.Fatalf("err: %v", err)
+ }
+ if acl == nil {
+ t.Fatalf("should not be nil")
+ }
+ if acl.AgentRead(a.config.NodeName) {
+ t.Fatalf("should deny")
+ }
+ if acl.AgentWrite(a.config.NodeName) {
+ t.Fatalf("should deny")
+ }
- // Read the token from the cache while ACLs are broken, which should
- // extend.
- acl, err = a.resolveToken("yep")
- if err != nil {
- t.Fatalf("err: %v", err)
- }
- if acl == nil {
- t.Fatalf("should not be nil")
- }
- if !acl.AgentRead(a.config.NodeName) {
- t.Fatalf("should allow")
- }
- if acl.AgentWrite(a.config.NodeName) {
- t.Fatalf("should deny")
+ // Read the token from the cache while ACLs are broken, which should
+ // extend.
+ acl, err = a.resolveToken("yep")
+ if err != nil {
+ t.Fatalf("err: %v", err)
+ }
+ if acl == nil {
+ t.Fatalf("should not be nil")
+ }
+ if !acl.AgentRead(a.config.NodeName) {
+ t.Fatalf("should allow")
+ }
+ if acl.AgentWrite(a.config.NodeName) {
+ t.Fatalf("should deny")
+ }
}
}
diff --git a/agent/config/runtime.go b/agent/config/runtime.go
index 9674d14d5..05ad8796c 100644
--- a/agent/config/runtime.go
+++ b/agent/config/runtime.go
@@ -94,8 +94,10 @@ type RuntimeConfig struct {
// ACL's to be used to service requests. This
// is the default. If the ACL is not in the cache,
// this acts like deny.
+ // * async-cache - Same behaviour as extend-cache, but perform ACL
+ // Lookups asynchronously when cache TTL is expired.
//
- // hcl: acl_down_policy = ("allow"|"deny"|"extend-cache")
+ // hcl: acl_down_policy = ("allow"|"deny"|"extend-cache"|"async-cache")
ACLDownPolicy string
// ACLEnforceVersion8 is used to gate a set of ACL policy features that
diff --git a/agent/consul/acl.go b/agent/consul/acl.go
index 0bf1dbb4c..23bdbff35 100644
--- a/agent/consul/acl.go
+++ b/agent/consul/acl.go
@@ -146,10 +146,12 @@ func newACLCache(conf *Config, logger *log.Logger, rpc rpcFn, local acl.FaultFun
if err != nil {
return nil, fmt.Errorf("Failed to create ACL policy cache: %v", err)
}
+ cache.fetchMap = make(map[string][]chan (RemoteACLResult))
return cache, nil
}
+// Result Type returned when fetching Remote ACLs asynchronously
type RemoteACLResult struct {
result acl.ACL
err error
@@ -179,8 +181,9 @@ func (c *aclCache) fireResult(id string, theACL acl.ACL, err error) {
channels := c.fetchMap[id]
delete(c.fetchMap, id)
c.fetchMutex.Unlock()
+ aclResult := RemoteACLResult{theACL, err}
for _, cx := range channels {
- cx <- RemoteACLResult{theACL, err}
+ cx <- aclResult
close(cx)
}
}
@@ -231,7 +234,7 @@ func (c *aclCache) loadACLInChan(id, authDC string, cached *aclCacheEntry) {
// local ACL fault function is registered to query replicated ACL data,
// and the user's policy allows it, we will try locally before we give
// up.
- if c.local != nil && c.config.ACLDownPolicy == "extend-cache" {
+ if c.local != nil && (c.config.ACLDownPolicy == "extend-cache" || c.config.ACLDownPolicy == "async-cache") {
parent, rules, err := c.local(id)
if err != nil {
// We don't make an exception here for ACLs that aren't
@@ -274,6 +277,7 @@ ACL_DOWN:
case "allow":
c.fireResult(id, acl.AllowAll(), nil)
return
+ case "async-cache":
case "extend-cache":
if cached != nil {
c.fireResult(id, cached.ACL, nil)
@@ -289,11 +293,11 @@ ACL_DOWN:
func (c *aclCache) lookupACLRemote(id, authDC string, cached *aclCacheEntry) RemoteACLResult {
// Attempt to refresh the policy from the ACL datacenter via an RPC.
myChan := make(chan RemoteACLResult)
- mustWaitForResult := cached == nil || c.config.ACLDownPolicy != "extend-cache"
+ mustWaitForResult := cached == nil || c.config.ACLDownPolicy != "async-cache"
c.fetchMutex.Lock()
clients, ok := c.fetchMap[id]
- if !ok {
- clients = make([]chan RemoteACLResult, 16)
+ if !ok || clients == nil {
+ clients = make([]chan RemoteACLResult, 0)
}
if mustWaitForResult {
c.fetchMap[id] = append(clients, myChan)
diff --git a/agent/consul/acl_test.go b/agent/consul/acl_test.go
index ace1284a8..fd08b54ab 100644
--- a/agent/consul/acl_test.go
+++ b/agent/consul/acl_test.go
@@ -508,78 +508,82 @@ func TestACL_DownPolicy_Allow(t *testing.T) {
func TestACL_DownPolicy_ExtendCache(t *testing.T) {
t.Parallel()
- dir1, s1 := testServerWithConfig(t, func(c *Config) {
- c.ACLDatacenter = "dc1"
- c.ACLTTL = 0
- c.ACLDownPolicy = "extend-cache"
- c.ACLMasterToken = "root"
- })
- defer os.RemoveAll(dir1)
- defer s1.Shutdown()
- client := rpcClient(t, s1)
- defer client.Close()
+ aclExtendPolicies := []string{"extend-cache", "async-cache"} //"async-cache"
- dir2, s2 := testServerWithConfig(t, func(c *Config) {
- c.ACLDatacenter = "dc1" // Enable ACLs!
- c.ACLTTL = 0
- c.ACLDownPolicy = "extend-cache"
- c.Bootstrap = false // Disable bootstrap
- })
- defer os.RemoveAll(dir2)
- defer s2.Shutdown()
+ for _, aclDownPolicy := range aclExtendPolicies {
+ dir1, s1 := testServerWithConfig(t, func(c *Config) {
+ c.ACLDatacenter = "dc1"
+ c.ACLTTL = 0
+ c.ACLDownPolicy = aclDownPolicy
+ c.ACLMasterToken = "root"
+ })
+ defer os.RemoveAll(dir1)
+ defer s1.Shutdown()
+ client := rpcClient(t, s1)
+ defer client.Close()
- // Try to join
- joinLAN(t, s2, s1)
- retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2})) })
+ dir2, s2 := testServerWithConfig(t, func(c *Config) {
+ c.ACLDatacenter = "dc1" // Enable ACLs!
+ c.ACLTTL = 0
+ c.ACLDownPolicy = aclDownPolicy
+ c.Bootstrap = false // Disable bootstrap
+ })
+ defer os.RemoveAll(dir2)
+ defer s2.Shutdown()
- testrpc.WaitForLeader(t, s1.RPC, "dc1")
+ // Try to join
+ joinLAN(t, s2, s1)
+ retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2})) })
- // Create a new token
- arg := structs.ACLRequest{
- Datacenter: "dc1",
- Op: structs.ACLSet,
- ACL: structs.ACL{
- Name: "User token",
- Type: structs.ACLTypeClient,
- Rules: testACLPolicy,
- },
- WriteRequest: structs.WriteRequest{Token: "root"},
- }
- var id string
- if err := s1.RPC("ACL.Apply", &arg, &id); err != nil {
- t.Fatalf("err: %v", err)
- }
+ testrpc.WaitForLeader(t, s1.RPC, "dc1")
- // find the non-authoritative server
- var nonAuth *Server
- var auth *Server
- if !s1.IsLeader() {
- nonAuth = s1
- auth = s2
- } else {
- nonAuth = s2
- auth = s1
- }
+ // Create a new token
+ arg := structs.ACLRequest{
+ Datacenter: "dc1",
+ Op: structs.ACLSet,
+ ACL: structs.ACL{
+ Name: "User token",
+ Type: structs.ACLTypeClient,
+ Rules: testACLPolicy,
+ },
+ WriteRequest: structs.WriteRequest{Token: "root"},
+ }
+ var id string
+ if err := s1.RPC("ACL.Apply", &arg, &id); err != nil {
+ t.Fatalf("err: %v", err)
+ }
- // Warm the caches
- aclR, err := nonAuth.resolveToken(id)
- if err != nil {
- t.Fatalf("err: %v", err)
- }
- if aclR == nil {
- t.Fatalf("bad acl: %#v", aclR)
- }
+ // find the non-authoritative server
+ var nonAuth *Server
+ var auth *Server
+ if !s1.IsLeader() {
+ nonAuth = s1
+ auth = s2
+ } else {
+ nonAuth = s2
+ auth = s1
+ }
- // Kill the authoritative server
- auth.Shutdown()
+ // Warm the caches
+ aclR, err := nonAuth.resolveToken(id)
+ if err != nil {
+ t.Fatalf("err: %v", err)
+ }
+ if aclR == nil {
+ t.Fatalf("bad acl: %#v", aclR)
+ }
- // Token should resolve into cached copy
- aclR2, err := nonAuth.resolveToken(id)
- if err != nil {
- t.Fatalf("err: %v", err)
- }
- if aclR2 != aclR {
- t.Fatalf("bad acl: %#v", aclR)
+ // Kill the authoritative server
+ auth.Shutdown()
+
+ // Token should resolve into cached copy
+ aclR2, err := nonAuth.resolveToken(id)
+ if err != nil {
+ t.Fatalf("err: %v", err)
+ }
+ if aclR2 != aclR {
+ t.Fatalf("bad acl: %#v", aclR)
+ }
}
}
diff --git a/agent/consul/config.go b/agent/consul/config.go
index 29a524531..570a07c16 100644
--- a/agent/consul/config.go
+++ b/agent/consul/config.go
@@ -235,8 +235,9 @@ type Config struct {
// ACLDownPolicy controls the behavior of ACLs if the ACLDatacenter
// cannot be contacted. It can be either "deny" to deny all requests,
- // or "extend-cache" which ignores the ACLCacheInterval and uses
- // cached policies. If a policy is not in the cache, it acts like deny.
+ // "extend-cache" or "async-cache" which ignores the ACLCacheInterval and
+ // uses cached policies.
+ // If a policy is not in the cache, it acts like deny.
// "allow" can be used to allow all requests. This is not recommended.
ACLDownPolicy string
@@ -378,6 +379,7 @@ func (c *Config) CheckACL() error {
switch c.ACLDownPolicy {
case "allow":
case "deny":
+ case "async-cache":
case "extend-cache":
default:
return fmt.Errorf("Unsupported down ACL policy: %s", c.ACLDownPolicy)
From 6dfbbf135006313d6986ac71edbb110da6edca82 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Sun, 1 Jul 2018 20:00:20 +0200
Subject: [PATCH 4/8] Updated documentation and adding more test case for
async-cache
---
agent/consul/acl_test.go | 198 +++++++++++-----------
website/source/docs/agent/options.html.md | 6 +-
website/source/docs/guides/acl.html.md | 7 +-
3 files changed, 109 insertions(+), 102 deletions(-)
diff --git a/agent/consul/acl_test.go b/agent/consul/acl_test.go
index fd08b54ab..45048f80a 100644
--- a/agent/consul/acl_test.go
+++ b/agent/consul/acl_test.go
@@ -589,116 +589,120 @@ func TestACL_DownPolicy_ExtendCache(t *testing.T) {
func TestACL_Replication(t *testing.T) {
t.Parallel()
- dir1, s1 := testServerWithConfig(t, func(c *Config) {
- c.ACLDatacenter = "dc1"
- c.ACLMasterToken = "root"
- })
- defer os.RemoveAll(dir1)
- defer s1.Shutdown()
- client := rpcClient(t, s1)
- defer client.Close()
+ aclExtendPolicies := []string{"extend-cache", "async-cache"} //"async-cache"
- dir2, s2 := testServerWithConfig(t, func(c *Config) {
- c.Datacenter = "dc2"
- c.ACLDatacenter = "dc1"
- c.ACLDefaultPolicy = "deny"
- c.ACLDownPolicy = "extend-cache"
- c.EnableACLReplication = true
- c.ACLReplicationInterval = 10 * time.Millisecond
- c.ACLReplicationApplyLimit = 1000000
- })
- s2.tokens.UpdateACLReplicationToken("root")
- defer os.RemoveAll(dir2)
- defer s2.Shutdown()
+ for _, aclDownPolicy := range aclExtendPolicies {
+ dir1, s1 := testServerWithConfig(t, func(c *Config) {
+ c.ACLDatacenter = "dc1"
+ c.ACLMasterToken = "root"
+ })
+ defer os.RemoveAll(dir1)
+ defer s1.Shutdown()
+ client := rpcClient(t, s1)
+ defer client.Close()
- dir3, s3 := testServerWithConfig(t, func(c *Config) {
- c.Datacenter = "dc3"
- c.ACLDatacenter = "dc1"
- c.ACLDownPolicy = "deny"
- c.EnableACLReplication = true
- c.ACLReplicationInterval = 10 * time.Millisecond
- c.ACLReplicationApplyLimit = 1000000
- })
- s3.tokens.UpdateACLReplicationToken("root")
- defer os.RemoveAll(dir3)
- defer s3.Shutdown()
+ dir2, s2 := testServerWithConfig(t, func(c *Config) {
+ c.Datacenter = "dc2"
+ c.ACLDatacenter = "dc1"
+ c.ACLDefaultPolicy = "deny"
+ c.ACLDownPolicy = aclDownPolicy
+ c.EnableACLReplication = true
+ c.ACLReplicationInterval = 10 * time.Millisecond
+ c.ACLReplicationApplyLimit = 1000000
+ })
+ s2.tokens.UpdateACLReplicationToken("root")
+ defer os.RemoveAll(dir2)
+ defer s2.Shutdown()
- // Try to join.
- joinWAN(t, s2, s1)
- joinWAN(t, s3, s1)
- testrpc.WaitForLeader(t, s1.RPC, "dc1")
- testrpc.WaitForLeader(t, s1.RPC, "dc2")
- testrpc.WaitForLeader(t, s1.RPC, "dc3")
+ dir3, s3 := testServerWithConfig(t, func(c *Config) {
+ c.Datacenter = "dc3"
+ c.ACLDatacenter = "dc1"
+ c.ACLDownPolicy = "deny"
+ c.EnableACLReplication = true
+ c.ACLReplicationInterval = 10 * time.Millisecond
+ c.ACLReplicationApplyLimit = 1000000
+ })
+ s3.tokens.UpdateACLReplicationToken("root")
+ defer os.RemoveAll(dir3)
+ defer s3.Shutdown()
- // Create a new token.
- arg := structs.ACLRequest{
- Datacenter: "dc1",
- Op: structs.ACLSet,
- ACL: structs.ACL{
- Name: "User token",
- Type: structs.ACLTypeClient,
- Rules: testACLPolicy,
- },
- WriteRequest: structs.WriteRequest{Token: "root"},
- }
- var id string
- if err := s1.RPC("ACL.Apply", &arg, &id); err != nil {
- t.Fatalf("err: %v", err)
- }
- // Wait for replication to occur.
- retry.Run(t, func(r *retry.R) {
- _, acl, err := s2.fsm.State().ACLGet(nil, id)
+ // Try to join.
+ joinWAN(t, s2, s1)
+ joinWAN(t, s3, s1)
+ testrpc.WaitForLeader(t, s1.RPC, "dc1")
+ testrpc.WaitForLeader(t, s1.RPC, "dc2")
+ testrpc.WaitForLeader(t, s1.RPC, "dc3")
+
+ // Create a new token.
+ arg := structs.ACLRequest{
+ Datacenter: "dc1",
+ Op: structs.ACLSet,
+ ACL: structs.ACL{
+ Name: "User token",
+ Type: structs.ACLTypeClient,
+ Rules: testACLPolicy,
+ },
+ WriteRequest: structs.WriteRequest{Token: "root"},
+ }
+ var id string
+ if err := s1.RPC("ACL.Apply", &arg, &id); err != nil {
+ t.Fatalf("err: %v", err)
+ }
+ // Wait for replication to occur.
+ retry.Run(t, func(r *retry.R) {
+ _, acl, err := s2.fsm.State().ACLGet(nil, id)
+ if err != nil {
+ r.Fatal(err)
+ }
+ if acl == nil {
+ r.Fatal(nil)
+ }
+ _, acl, err = s3.fsm.State().ACLGet(nil, id)
+ if err != nil {
+ r.Fatal(err)
+ }
+ if acl == nil {
+ r.Fatal(nil)
+ }
+ })
+
+ // Kill the ACL datacenter.
+ s1.Shutdown()
+
+ // Token should resolve on s2, which has replication + extend-cache.
+ acl, err := s2.resolveToken(id)
if err != nil {
- r.Fatal(err)
+ t.Fatalf("err: %v", err)
}
if acl == nil {
- r.Fatal(nil)
+ t.Fatalf("missing acl")
}
- _, acl, err = s3.fsm.State().ACLGet(nil, id)
+
+ // Check the policy
+ if acl.KeyRead("bar") {
+ t.Fatalf("unexpected read")
+ }
+ if !acl.KeyRead("foo/test") {
+ t.Fatalf("unexpected failed read")
+ }
+
+ // Although s3 has replication, and we verified that the ACL is there,
+ // it can not be used because of the down policy.
+ acl, err = s3.resolveToken(id)
if err != nil {
- r.Fatal(err)
+ t.Fatalf("err: %v", err)
}
if acl == nil {
- r.Fatal(nil)
+ t.Fatalf("missing acl")
}
- })
- // Kill the ACL datacenter.
- s1.Shutdown()
-
- // Token should resolve on s2, which has replication + extend-cache.
- acl, err := s2.resolveToken(id)
- if err != nil {
- t.Fatalf("err: %v", err)
- }
- if acl == nil {
- t.Fatalf("missing acl")
- }
-
- // Check the policy
- if acl.KeyRead("bar") {
- t.Fatalf("unexpected read")
- }
- if !acl.KeyRead("foo/test") {
- t.Fatalf("unexpected failed read")
- }
-
- // Although s3 has replication, and we verified that the ACL is there,
- // it can not be used because of the down policy.
- acl, err = s3.resolveToken(id)
- if err != nil {
- t.Fatalf("err: %v", err)
- }
- if acl == nil {
- t.Fatalf("missing acl")
- }
-
- // Check the policy.
- if acl.KeyRead("bar") {
- t.Fatalf("unexpected read")
- }
- if acl.KeyRead("foo/test") {
- t.Fatalf("unexpected read")
+ // Check the policy.
+ if acl.KeyRead("bar") {
+ t.Fatalf("unexpected read")
+ }
+ if acl.KeyRead("foo/test") {
+ t.Fatalf("unexpected read")
+ }
}
}
diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md
index 4e2ecb303..65402c2ca 100644
--- a/website/source/docs/agent/options.html.md
+++ b/website/source/docs/agent/options.html.md
@@ -496,11 +496,13 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
to enable ACL support.
* `acl_down_policy` - Either
- "allow", "deny" or "extend-cache"; "extend-cache" is the default. In the case that the
+ "allow", "deny", "extend-cache" or "async-cache"; "extend-cache" is the default. In the case that the
policy for a token cannot be read from the [`acl_datacenter`](#acl_datacenter) or leader
node, the down policy is applied. In "allow" mode, all actions are permitted, "deny" restricts
all operations, and "extend-cache" allows any cached ACLs to be used, ignoring their TTL
- values. If a non-cached ACL is used, "extend-cache" acts like "deny".
+ values. If a non-cached ACL is used, "extend-cache" acts like "deny". "async-cache" acts the same
+ way as "extend-cache" but performs updates asynchronously when ACL is present but its TTL is
+ expired.
* `acl_agent_master_token` -
Used to access agent endpoints that require agent read
diff --git a/website/source/docs/guides/acl.html.md b/website/source/docs/guides/acl.html.md
index f6aad52bc..c25666ab9 100644
--- a/website/source/docs/guides/acl.html.md
+++ b/website/source/docs/guides/acl.html.md
@@ -1062,9 +1062,10 @@ is set to "extend-cache", tokens will be resolved during the outage using the
replicated set of ACLs. An [ACL replication status](/api/acl.html#acl_replication_status)
endpoint is available to monitor the health of the replication process.
Also note that in recent versions of Consul (greater than 1.2.0), using
-`acl_down_policy = "extend-cache"` refreshes token asynchronously when an ACL is
-already cached and is expired. It allows to avoid having issues when connectivity with
-the authoritative is not completely broken, but very slow.
+`acl_down_policy = "async-cache"` refreshes token asynchronously when an ACL is
+already cached and is expired while similar semantics than "extend-cache".
+It allows to avoid having issues when connectivity with the authoritative is not completely
+broken, but very slow.
Locally-resolved ACLs will be cached using the [`acl_ttl`](/docs/agent/options.html#acl_ttl)
setting of the non-authoritative datacenter, so these entries may persist in the
From d8a6571683de666c094dc563dc708d5bfc5a19af Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Sun, 1 Jul 2018 23:37:40 +0200
Subject: [PATCH 5/8] Improve doc for async-cache
---
website/source/docs/agent/options.html.md | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md
index 65402c2ca..06f071dd0 100644
--- a/website/source/docs/agent/options.html.md
+++ b/website/source/docs/agent/options.html.md
@@ -500,9 +500,10 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
policy for a token cannot be read from the [`acl_datacenter`](#acl_datacenter) or leader
node, the down policy is applied. In "allow" mode, all actions are permitted, "deny" restricts
all operations, and "extend-cache" allows any cached ACLs to be used, ignoring their TTL
- values. If a non-cached ACL is used, "extend-cache" acts like "deny". "async-cache" acts the same
- way as "extend-cache" but performs updates asynchronously when ACL is present but its TTL is
- expired.
+ values. If a non-cached ACL is used, "extend-cache" acts like "deny".
+ The value "async-cache" acts the same way as "extend-cache" but performs updates
+ asynchronously when ACL is present but its TTL is expired, thus, if latency is bad between
+ ACL authoritative and other datacenters, latency of operations is not impacted.
* `acl_agent_master_token` -
Used to access agent endpoints that require agent read
From f675575b66e68befcc77d32327f5fa110523af06 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Mon, 2 Jul 2018 00:24:37 +0200
Subject: [PATCH 6/8] Trying to fix Travis tests
---
GNUmakefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/GNUmakefile b/GNUmakefile
index 5df83416b..d510c0fe6 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -158,7 +158,7 @@ test: other-consul dev-build vet
@# hide it from travis as it exceeds their log limits and causes job to be
@# terminated (over 4MB and over 10k lines in the UI). We need to output
@# _something_ to stop them terminating us due to inactivity...
- { go test $(GOTEST_FLAGS) -tags '$(GOTAGS)' -timeout 5m $(GOTEST_PKGS) 2>&1 ; echo $$? > exit-code ; } | tee test.log | egrep '^(ok|FAIL)\s*github.com/hashicorp/consul'
+ { go test $(GOTEST_FLAGS) -tags '$(GOTAGS)' -timeout 7m $(GOTEST_PKGS) 2>&1 ; echo $$? > exit-code ; } | tee test.log | egrep '^(ok|FAIL)\s*github.com/hashicorp/consul'
@echo "Exit code: $$(cat exit-code)" >> test.log
@# This prints all the race report between ====== lines
@awk '/^WARNING: DATA RACE/ {do_print=1; print "=================="} do_print==1 {print} /^={10,}/ {do_print=0}' test.log || true
From 95a0ab9f999b45f86ce347a4d830e43bfee68a62 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Mon, 2 Jul 2018 17:39:34 +0200
Subject: [PATCH 7/8] Updated swith case to use same branch for async-cache and
extend-cache
---
agent/acl.go | 3 +--
agent/consul/acl.go | 3 +--
agent/consul/config.go | 3 +--
3 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/agent/acl.go b/agent/acl.go
index 1143da97c..5c4deb7cc 100644
--- a/agent/acl.go
+++ b/agent/acl.go
@@ -103,8 +103,7 @@ func newACLManager(config *config.RuntimeConfig) (*aclManager, error) {
down = acl.AllowAll()
case "deny":
down = acl.DenyAll()
- case "extend-cache":
- case "async-cache":
+ case "async-cache", "extend-cache":
// Leave the down policy as nil to signal this.
default:
return nil, fmt.Errorf("invalid ACL down policy %q", config.ACLDownPolicy)
diff --git a/agent/consul/acl.go b/agent/consul/acl.go
index 23bdbff35..4dbfe2b39 100644
--- a/agent/consul/acl.go
+++ b/agent/consul/acl.go
@@ -277,8 +277,7 @@ ACL_DOWN:
case "allow":
c.fireResult(id, acl.AllowAll(), nil)
return
- case "async-cache":
- case "extend-cache":
+ case "async-cache", "extend-cache":
if cached != nil {
c.fireResult(id, cached.ACL, nil)
return
diff --git a/agent/consul/config.go b/agent/consul/config.go
index 570a07c16..b878d9a99 100644
--- a/agent/consul/config.go
+++ b/agent/consul/config.go
@@ -379,8 +379,7 @@ func (c *Config) CheckACL() error {
switch c.ACLDownPolicy {
case "allow":
case "deny":
- case "async-cache":
- case "extend-cache":
+ case "async-cache", "extend-cache":
default:
return fmt.Errorf("Unsupported down ACL policy: %s", c.ACLDownPolicy)
}
From 135ac85b214f97048d82d5a9f22c0dc8324b8f6d Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Sat, 7 Jul 2018 14:03:34 +0200
Subject: [PATCH 8/8] Fixed indentation in test
---
agent/acl_test.go | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/agent/acl_test.go b/agent/acl_test.go
index 0932dc525..6b912646e 100644
--- a/agent/acl_test.go
+++ b/agent/acl_test.go
@@ -277,9 +277,9 @@ func TestACL_Down_Extend(t *testing.T) {
aclExtendPolicies := []string{"extend-cache", "async-cache"}
for _, aclDownPolicy := range aclExtendPolicies {
a := NewTestAgent(t.Name(), TestACLConfig()+`
- acl_down_policy = "`+aclDownPolicy+`"
- acl_enforce_version_8 = true
- `)
+ acl_down_policy = "`+aclDownPolicy+`"
+ acl_enforce_version_8 = true
+ `)
defer a.Shutdown()
m := MockServer{