From 8572931afe52a04b9e3c43dfc0efe042771f0291 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Thu, 13 Jul 2017 22:33:47 -0700 Subject: [PATCH] Cleans up version 8 ACLs in the agent and the docs. (#3248) * Moves magic check and service constants into shared structs package. * Removes the "consul" service from local state. Since this service is added by the leader, it doesn't really make sense to also keep it in local state (which requires special ACLs to configure), and requires a bunch of special cases in the local state logic. This requires fewer special cases and makes ACL bootstrapping cleaner. * Makes coordinate update ACL log message a warning, similar to other AE warnings. * Adds much more detailed examples for bootstrapping ACLs. This can hopefully replace https://gist.github.com/slackpad/d89ce0e1cc0802c3c4f2d84932fa3234. --- agent/agent.go | 32 +-- agent/agent_endpoint_test.go | 10 +- agent/agent_test.go | 48 +---- agent/consul/acl.go | 2 +- agent/consul/catalog_endpoint.go | 2 +- agent/consul/health_endpoint_test.go | 2 +- agent/consul/leader.go | 34 ++- agent/consul/leader_test.go | 16 +- agent/consul/structs/catalog.go | 21 ++ agent/local.go | 10 +- agent/local_test.go | 32 +-- agent/session_endpoint.go | 3 +- agent/session_endpoint_test.go | 5 +- website/source/docs/guides/acl.html.md | 278 ++++++++++++++++++++++++- 14 files changed, 359 insertions(+), 136 deletions(-) create mode 100644 agent/consul/structs/catalog.go diff --git a/agent/agent.go b/agent/agent.go index 6001a4ee3..e81b91fc3 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -309,16 +309,6 @@ func (a *Agent) Start() error { a.delegate = server a.state.delegate = server - - // Automatically register the "consul" service on server nodes - consulService := structs.NodeService{ - Service: consul.ConsulServiceName, - ID: consul.ConsulServiceID, - Port: c.Ports.Server, - Tags: []string{}, - } - - a.state.AddService(&consulService, c.GetTokenForAgent()) } else { client, err := consul.NewClientLogger(consulCfg, a.logger) if err != nil { @@ -1309,17 +1299,17 @@ func (a *Agent) sendCoordinate() { members := a.LANMembers() grok, err := consul.CanServersUnderstandProtocol(members, 3) if err != nil { - a.logger.Printf("[ERR] agent: failed to check servers: %s", err) + a.logger.Printf("[ERR] agent: Failed to check servers: %s", err) continue } if !grok { - a.logger.Printf("[DEBUG] agent: skipping coordinate updates until servers are upgraded") + a.logger.Printf("[DEBUG] agent: Skipping coordinate updates until servers are upgraded") continue } c, err := a.GetLANCoordinate() if err != nil { - a.logger.Printf("[ERR] agent: failed to get coordinate: %s", err) + a.logger.Printf("[ERR] agent: Failed to get coordinate: %s", err) continue } @@ -1331,7 +1321,11 @@ func (a *Agent) sendCoordinate() { } var reply struct{} if err := a.RPC("Coordinate.Update", &req, &reply); err != nil { - a.logger.Printf("[ERR] agent: coordinate update error: %s", err) + if strings.Contains(err.Error(), permissionDenied) { + a.logger.Printf("[WARN] agent: Coordinate update blocked by ACLs") + } else { + a.logger.Printf("[ERR] agent: Coordinate update error: %v", err) + } continue } case <-a.shutdownCh: @@ -1561,13 +1555,6 @@ func (a *Agent) AddService(service *structs.NodeService, chkTypes []*structs.Che // RemoveService is used to remove a service entry. // The agent will make a best effort to ensure it is deregistered func (a *Agent) RemoveService(serviceID string, persist bool) error { - // Protect "consul" service from deletion by a user - if _, ok := a.delegate.(*consul.Server); ok && serviceID == consul.ConsulServiceID { - return fmt.Errorf( - "Deregistering the %s service is not allowed", - consul.ConsulServiceID) - } - // Validate ServiceID if serviceID == "" { return fmt.Errorf("ServiceID missing") @@ -2069,9 +2056,6 @@ func (a *Agent) loadServices(conf *Config) error { // known to the local agent. func (a *Agent) unloadServices() error { for _, service := range a.state.Services() { - if service.ID == consul.ConsulServiceID { - continue - } if err := a.RemoveService(service.ID, false); err != nil { return fmt.Errorf("Failed deregistering service '%s': %v", service.ID, err) } diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 401248539..84b9d3a7d 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -57,7 +57,7 @@ func TestAgent_Services(t *testing.T) { t.Fatalf("Err: %v", err) } val := obj.(map[string]*structs.NodeService) - if len(val) != 2 { + if len(val) != 1 { t.Fatalf("bad services: %v", obj) } if val["mysql"].Port != 5000 { @@ -70,6 +70,14 @@ func TestAgent_Services_ACLFilter(t *testing.T) { a := NewTestAgent(t.Name(), TestACLConfig()) defer a.Shutdown() + srv1 := &structs.NodeService{ + ID: "mysql", + Service: "mysql", + Tags: []string{"master"}, + Port: 5000, + } + a.state.AddService(srv1, "") + t.Run("no token", func(t *testing.T) { req, _ := http.NewRequest("GET", "/v1/agent/services", nil) obj, err := a.srv.AgentServices(nil, req) diff --git a/agent/agent_test.go b/agent/agent_test.go index 4b9470a94..97f772cf4 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -492,11 +492,6 @@ func TestAgent_RemoveService(t *testing.T) { t.Fatalf("err: %v", err) } - // Remove the consul service - if err := a.RemoveService("consul", false); err == nil { - t.Fatalf("should have errored") - } - // Remove without an ID if err := a.RemoveService("", false); err == nil { t.Fatalf("should have errored") @@ -882,34 +877,6 @@ func TestAgent_updateTTLCheck(t *testing.T) { } } -func TestAgent_ConsulService(t *testing.T) { - t.Parallel() - a := NewTestAgent(t.Name(), nil) - defer a.Shutdown() - - // Consul service is registered - services := a.state.Services() - if _, ok := services[consul.ConsulServiceID]; !ok { - t.Fatalf("%s service should be registered", consul.ConsulServiceID) - } - - // todo(fs): data race - func() { - a.state.Lock() - defer a.state.Unlock() - - // Perform anti-entropy on consul service - if err := a.state.syncService(consul.ConsulServiceID); err != nil { - t.Fatalf("err: %s", err) - } - }() - - // Consul service should be in sync - if !a.state.serviceStatus[consul.ConsulServiceID].inSync { - t.Fatalf("%s service should be in sync", consul.ConsulServiceID) - } -} - func TestAgent_PersistService(t *testing.T) { t.Parallel() cfg := TestConfig() @@ -1432,19 +1399,8 @@ func TestAgent_unloadServices(t *testing.T) { if err := a.unloadServices(); err != nil { t.Fatalf("err: %s", err) } - - // Make sure it was unloaded and the consul service remains - found = false - for id := range a.state.Services() { - if id == svc.ID { - t.Fatalf("should have unloaded services") - } - if id == consul.ConsulServiceID { - found = true - } - } - if !found { - t.Fatalf("consul service should not be removed") + if len(a.state.Services()) != 0 { + t.Fatalf("should have unloaded services") } } diff --git a/agent/consul/acl.go b/agent/consul/acl.go index 7845ab53d..ead3a25be 100644 --- a/agent/consul/acl.go +++ b/agent/consul/acl.go @@ -341,7 +341,7 @@ func (f *aclFilter) allowService(service string) bool { return true } - if !f.enforceVersion8 && service == ConsulServiceID { + if !f.enforceVersion8 && service == structs.ConsulServiceID { return true } diff --git a/agent/consul/catalog_endpoint.go b/agent/consul/catalog_endpoint.go index 1e75b16b6..db7b6c7bf 100644 --- a/agent/consul/catalog_endpoint.go +++ b/agent/consul/catalog_endpoint.go @@ -64,7 +64,7 @@ func (c *Catalog) Register(args *structs.RegisterRequest, reply *struct{}) error // is going away after version 0.8). We check this same policy // later if version 0.8 is enabled, so we can eventually just // delete this and do all the ACL checks down there. - if args.Service.Service != ConsulServiceName { + if args.Service.Service != structs.ConsulServiceName { if acl != nil && !acl.ServiceWrite(args.Service.Service) { return errPermissionDenied } diff --git a/agent/consul/health_endpoint_test.go b/agent/consul/health_endpoint_test.go index be64a0310..90bb49163 100644 --- a/agent/consul/health_endpoint_test.go +++ b/agent/consul/health_endpoint_test.go @@ -54,7 +54,7 @@ func TestHealth_ChecksInState(t *testing.T) { if checks[0].Name != "memory utilization" { t.Fatalf("Bad: %v", checks[0]) } - if checks[1].CheckID != SerfCheckID { + if checks[1].CheckID != structs.SerfCheckID { t.Fatalf("Bad: %v", checks[1]) } } diff --git a/agent/consul/leader.go b/agent/consul/leader.go index 2f42c5293..ba77b4492 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -18,14 +18,8 @@ import ( ) const ( - SerfCheckID types.CheckID = "serfHealth" - SerfCheckName = "Serf Health Status" - SerfCheckAliveOutput = "Agent alive and reachable" - SerfCheckFailedOutput = "Agent not live or unreachable" - ConsulServiceID = "consul" - ConsulServiceName = "consul" - newLeaderEvent = "consul:new-leader" - barrierWriteTimeout = 2 * time.Minute + newLeaderEvent = "consul:new-leader" + barrierWriteTimeout = 2 * time.Minute ) // monitorLeadership is used to monitor if we acquire or lose our role @@ -334,7 +328,7 @@ func (s *Server) reconcileReaped(known map[string]struct{}) error { } for _, check := range checks { // Ignore any non serf checks - if check.CheckID != SerfCheckID { + if check.CheckID != structs.SerfCheckID { continue } @@ -359,7 +353,7 @@ func (s *Server) reconcileReaped(known map[string]struct{}) error { } serverPort := 0 for _, service := range services.Services { - if service.ID == ConsulServiceID { + if service.ID == structs.ConsulServiceID { serverPort = service.Port break } @@ -430,8 +424,8 @@ func (s *Server) handleAliveMember(member serf.Member) error { var service *structs.NodeService if valid, parts := agent.IsConsulServer(member); valid { service = &structs.NodeService{ - ID: ConsulServiceID, - Service: ConsulServiceName, + ID: structs.ConsulServiceID, + Service: structs.ConsulServiceName, Port: parts.Port, } @@ -473,7 +467,7 @@ func (s *Server) handleAliveMember(member serf.Member) error { return err } for _, check := range checks { - if check.CheckID == SerfCheckID && check.Status == api.HealthPassing { + if check.CheckID == structs.SerfCheckID && check.Status == api.HealthPassing { return nil } } @@ -490,10 +484,10 @@ AFTER_CHECK: Service: service, Check: &structs.HealthCheck{ Node: member.Name, - CheckID: SerfCheckID, - Name: SerfCheckName, + CheckID: structs.SerfCheckID, + Name: structs.SerfCheckName, Status: api.HealthPassing, - Output: SerfCheckAliveOutput, + Output: structs.SerfCheckAliveOutput, }, // If there's existing information about the node, do not @@ -520,7 +514,7 @@ func (s *Server) handleFailedMember(member serf.Member) error { return err } for _, check := range checks { - if check.CheckID == SerfCheckID && check.Status == api.HealthCritical { + if check.CheckID == structs.SerfCheckID && check.Status == api.HealthCritical { return nil } } @@ -535,10 +529,10 @@ func (s *Server) handleFailedMember(member serf.Member) error { Address: member.Addr.String(), Check: &structs.HealthCheck{ Node: member.Name, - CheckID: SerfCheckID, - Name: SerfCheckName, + CheckID: structs.SerfCheckID, + Name: structs.SerfCheckName, Status: api.HealthCritical, - Output: SerfCheckFailedOutput, + Output: structs.SerfCheckFailedOutput, }, // If there's existing information about the node, do not diff --git a/agent/consul/leader_test.go b/agent/consul/leader_test.go index 35a98d45b..052f86d9e 100644 --- a/agent/consul/leader_test.go +++ b/agent/consul/leader_test.go @@ -53,10 +53,10 @@ func TestLeader_RegisterMember(t *testing.T) { if len(checks) != 1 { t.Fatalf("client missing check") } - if checks[0].CheckID != SerfCheckID { + if checks[0].CheckID != structs.SerfCheckID { t.Fatalf("bad check: %v", checks[0]) } - if checks[0].Name != SerfCheckName { + if checks[0].Name != structs.SerfCheckName { t.Fatalf("bad check: %v", checks[0]) } if checks[0].Status != api.HealthPassing { @@ -125,10 +125,10 @@ func TestLeader_FailedMember(t *testing.T) { if len(checks) != 1 { t.Fatalf("client missing check") } - if checks[0].CheckID != SerfCheckID { + if checks[0].CheckID != structs.SerfCheckID { t.Fatalf("bad check: %v", checks[0]) } - if checks[0].Name != SerfCheckName { + if checks[0].Name != structs.SerfCheckName { t.Fatalf("bad check: %v", checks[0]) } @@ -270,8 +270,8 @@ func TestLeader_Reconcile_ReapMember(t *testing.T) { Address: "127.1.1.1", Check: &structs.HealthCheck{ Node: "no-longer-around", - CheckID: SerfCheckID, - Name: SerfCheckName, + CheckID: structs.SerfCheckID, + Name: structs.SerfCheckName, Status: api.HealthCritical, }, WriteRequest: structs.WriteRequest{ @@ -378,8 +378,8 @@ func TestLeader_Reconcile_Races(t *testing.T) { NodeMeta: map[string]string{"hello": "world"}, Check: &structs.HealthCheck{ Node: c1.config.NodeName, - CheckID: SerfCheckID, - Name: SerfCheckName, + CheckID: structs.SerfCheckID, + Name: structs.SerfCheckName, Status: api.HealthCritical, Output: "", }, diff --git a/agent/consul/structs/catalog.go b/agent/consul/structs/catalog.go new file mode 100644 index 000000000..b6b443f6f --- /dev/null +++ b/agent/consul/structs/catalog.go @@ -0,0 +1,21 @@ +package structs + +import ( + "github.com/hashicorp/consul/types" +) + +// These are used to manage the built-in "serfHealth" check that's attached +// to every node in the catalog. +const ( + SerfCheckID types.CheckID = "serfHealth" + SerfCheckName = "Serf Health Status" + SerfCheckAliveOutput = "Agent alive and reachable" + SerfCheckFailedOutput = "Agent not live or unreachable" +) + +// These are used to manage the "consul" service that's attached to every Consul +// server node in the catalog. +const ( + ConsulServiceID = "consul" + ConsulServiceName = "consul" +) diff --git a/agent/local.go b/agent/local.go index f359f40da..d71ca2211 100644 --- a/agent/local.go +++ b/agent/local.go @@ -9,7 +9,6 @@ import ( "sync/atomic" "time" - "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/consul/structs" "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/lib" @@ -483,6 +482,11 @@ func (l *localState) setSyncState() error { // If we don't have the service locally, deregister it existing, ok := l.services[id] if !ok { + // The consul service is created automatically, and does + // not need to be deregistered. + if id == structs.ConsulServiceID { + continue + } l.serviceStatus[id] = syncStatus{inSync: false} continue } @@ -517,8 +521,8 @@ func (l *localState) setSyncState() error { existing, ok := l.checks[id] if !ok { // The Serf check is created automatically, and does not - // need to be registered - if id == consul.SerfCheckID { + // need to be deregistered. + if id == structs.SerfCheckID { continue } l.checkStatus[id] = syncStatus{inSync: false} diff --git a/agent/local_test.go b/agent/local_test.go index 5f5e19bfd..de66480df 100644 --- a/agent/local_test.go +++ b/agent/local_test.go @@ -161,7 +161,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) { if !reflect.DeepEqual(serv, srv6) { r.Fatalf("bad: %v %v", serv, srv6) } - case "consul": + case structs.ConsulServiceID: // ignore default: r.Fatalf("unexpected service: %v", id) @@ -173,10 +173,10 @@ func TestAgentAntiEntropy_Services(t *testing.T) { defer a.state.RUnlock() // Check the local state - if len(a.state.services) != 6 { + if len(a.state.services) != 5 { r.Fatalf("bad: %v", a.state.services) } - if len(a.state.serviceStatus) != 6 { + if len(a.state.serviceStatus) != 5 { r.Fatalf("bad: %v", a.state.serviceStatus) } for name, status := range a.state.serviceStatus { @@ -222,7 +222,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) { if !reflect.DeepEqual(serv, srv6) { r.Fatalf("bad: %v %v", serv, srv6) } - case "consul": + case structs.ConsulServiceID: // ignore default: r.Fatalf("unexpected service: %v", id) @@ -234,10 +234,10 @@ func TestAgentAntiEntropy_Services(t *testing.T) { defer a.state.RUnlock() // Check the local state - if len(a.state.services) != 5 { + if len(a.state.services) != 4 { r.Fatalf("bad: %v", a.state.services) } - if len(a.state.serviceStatus) != 5 { + if len(a.state.serviceStatus) != 4 { r.Fatalf("bad: %v", a.state.serviceStatus) } for name, status := range a.state.serviceStatus { @@ -333,7 +333,7 @@ func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) { !reflect.DeepEqual(serv.Tags, []string{"tag2"}) { r.Fatalf("bad: %v %v", serv, srv2) } - case "consul": + case structs.ConsulServiceID: // ignore default: r.Fatalf("unexpected service: %v", id) @@ -575,7 +575,7 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { if !reflect.DeepEqual(serv, srv2) { t.Fatalf("bad: %#v %#v", serv, srv2) } - case "consul": + case structs.ConsulServiceID: // ignore default: t.Fatalf("unexpected service: %v", id) @@ -588,10 +588,10 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { defer a.state.RUnlock() // Check the local state - if len(a.state.services) != 3 { + if len(a.state.services) != 2 { t.Fatalf("bad: %v", a.state.services) } - if len(a.state.serviceStatus) != 3 { + if len(a.state.serviceStatus) != 2 { t.Fatalf("bad: %v", a.state.serviceStatus) } for name, status := range a.state.serviceStatus { @@ -634,7 +634,7 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { t.Fatalf("should not be permitted") case "api": t.Fatalf("should be deleted") - case "consul": + case structs.ConsulServiceID: // ignore default: t.Fatalf("unexpected service: %v", id) @@ -647,10 +647,10 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { defer a.state.RUnlock() // Check the local state - if len(a.state.services) != 2 { + if len(a.state.services) != 1 { t.Fatalf("bad: %v", a.state.services) } - if len(a.state.serviceStatus) != 2 { + if len(a.state.serviceStatus) != 1 { t.Fatalf("bad: %v", a.state.serviceStatus) } for name, status := range a.state.serviceStatus { @@ -975,7 +975,7 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) { if !reflect.DeepEqual(serv, srv2) { t.Fatalf("bad: %#v %#v", serv, srv2) } - case "consul": + case structs.ConsulServiceID: // ignore default: t.Fatalf("unexpected service: %v", id) @@ -988,10 +988,10 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) { defer a.state.RUnlock() // Check the local state - if len(a.state.services) != 3 { + if len(a.state.services) != 2 { t.Fatalf("bad: %v", a.state.services) } - if len(a.state.serviceStatus) != 3 { + if len(a.state.serviceStatus) != 2 { t.Fatalf("bad: %v", a.state.serviceStatus) } for name, status := range a.state.serviceStatus { diff --git a/agent/session_endpoint.go b/agent/session_endpoint.go index 8a5e2a311..685b30bc9 100644 --- a/agent/session_endpoint.go +++ b/agent/session_endpoint.go @@ -6,7 +6,6 @@ import ( "strings" "time" - "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/consul/structs" "github.com/hashicorp/consul/types" ) @@ -39,7 +38,7 @@ func (s *HTTPServer) SessionCreate(resp http.ResponseWriter, req *http.Request) Op: structs.SessionCreate, Session: structs.Session{ Node: s.agent.config.NodeName, - Checks: []types.CheckID{consul.SerfCheckID}, + Checks: []types.CheckID{structs.SerfCheckID}, LockDelay: 15 * time.Second, Behavior: structs.SessionKeysRelease, TTL: "", diff --git a/agent/session_endpoint_test.go b/agent/session_endpoint_test.go index 4dc098cfd..ce3b5a6f7 100644 --- a/agent/session_endpoint_test.go +++ b/agent/session_endpoint_test.go @@ -8,7 +8,6 @@ import ( "testing" "time" - "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/consul/structs" "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/types" @@ -43,7 +42,7 @@ func TestSessionCreate(t *testing.T) { raw := map[string]interface{}{ "Name": "my-cool-session", "Node": a.Config.NodeName, - "Checks": []types.CheckID{consul.SerfCheckID, "consul"}, + "Checks": []types.CheckID{structs.SerfCheckID, "consul"}, "LockDelay": "20s", } enc.Encode(raw) @@ -89,7 +88,7 @@ func TestSessionCreateDelete(t *testing.T) { raw := map[string]interface{}{ "Name": "my-cool-session", "Node": a.Config.NodeName, - "Checks": []types.CheckID{consul.SerfCheckID, "consul"}, + "Checks": []types.CheckID{structs.SerfCheckID, "consul"}, "LockDelay": "20s", "Behavior": structs.SessionKeysDelete, } diff --git a/website/source/docs/guides/acl.html.md b/website/source/docs/guides/acl.html.md index 56249baaf..285fa833b 100644 --- a/website/source/docs/guides/acl.html.md +++ b/website/source/docs/guides/acl.html.md @@ -46,8 +46,8 @@ Tokens are bound to a set of rules that control which Consul resources the token has access to. Policies can be defined in either a whitelist or blacklist mode depending on the configuration of [`acl_default_policy`](/docs/agent/options.html#acl_default_policy). If the default -policy is to "deny all" actions, then token rules can be set to whitelist specific -actions. In the inverse, the "allow all" default behavior is a blacklist where rules +policy is to "deny" all actions, then token rules can be set to whitelist specific +actions. In the inverse, the "allow" all default behavior is a blacklist where rules are used to prohibit actions. By default, Consul will allow all actions. The following table summarizes the ACL policies that are available for constructing @@ -100,10 +100,17 @@ the cache TTL is an upper bound on the staleness of policy that is enforced. It possible to set a zero TTL, but this has adverse performance impacts, as every request requires refreshing the policy via an RPC call. -#### Enabling ACLs +During an outage of the ACL datacenter, or loss of connectivity, the cache will be +used as long as the TTL is valid, or the cache may be extended if the +[`acl_down_policy`](/docs/agent/options.html#acl_down_policy) is set accordingly. +This configuration also allows the ACL system to fail open or closed. +[ACL replication](#replication) is also available to allow for the full set of ACL +tokens to be replicated for use during an outage. -Enabling ACLs is done by setting up the following configuration options. These are -marked as to whether they are set on servers, clients, or both. +#### Configuring ACLs + +ACLs are configured using several different configuration options. These are marked +as to whether they are set on servers, clients, or both. | Configuration Option | Servers | Clients | Purpose | | -------------------- | ------- | ------- | ------- | @@ -122,13 +129,43 @@ system, or accessing Consul in special situations: | Special Token | Servers | Clients | Purpose | | ------------- | ------- | ------- | ------- | | [`acl_agent_master_token`](/docs/agent/options.html#acl_agent_master_token) | `OPTIONAL` | `OPTIONAL` | Special token that can be used to access [Agent API](/api/agent.html) when the ACL datacenter isn't available, or servers are offline (for clients); used for setting up the cluster such as doing initial join operations | -| [`acl_agent_token`](/docs/agent/options.html#acl_agent_token) | `OPTIONAL` | `OPTIONAL` | Special token that is used for an agent's internal operations with the [Catalog API](/api/catalog.html); this needs to have at least `node` policy access so the agent can self update its registration information | -| [`acl_master_token`](/docs/agent/options.html#acl_master_token) | `REQUIRED` | `N/A` | Special token used to bootstrap the ACL system, see details below. | +| [`acl_agent_token`](/docs/agent/options.html#acl_agent_token) | `OPTIONAL` | `OPTIONAL` | Special token that is used for an agent's internal operations with the [Catalog API](/api/catalog.html); this needs to have at least `node` policy access so the agent can self update its registration information, and also needs `service` read access for all services that will be registered with that node for [anti-entropy](/docs/internals/anti-entropy.html) syncing | +| [`acl_master_token`](/docs/agent/options.html#acl_master_token) | `REQUIRED` | `N/A` | Special token used to bootstrap the ACL system, see details below | | [`acl_token`](/docs/agent/options.html#acl_token) | `OPTIONAL` | `OPTIONAL` | Default token to use for client requests where no token is supplied; this is often configured with read-only access to services to enable DNS service discovery on agents | -Bootstrapping the ACL system is done by providing an initial -[`acl_master_token`](/docs/agent/options.html#acl_master_token) which will be created -as a "management" type token if it does not exist. The +#### Bootstrapping ACLs + +Bootstrapping ACLs on a new cluster requires a few steps, outlined in the example in this +section. + +**Enable ACLs on the Consul Servers** + +The first step for bootstrapping ACLs is to enable ACLs on the Consul servers in the ACL +datacenter. In this example, we are configuring the following: + +1. An ACL datacenter of "dc1", which is where these servers are +2. An ACL master token of "b1gs33cr3t" +3. A default policy of "deny" which means we are in whitelist mode +4. A down policy of "extend-cache" which means that we will ignore token TTLs during an + outage + +Here's the corresponding JSON configuration file: + +```json +{ + "acl_datacenter": "dc1", + "acl_master_token": "b1gs33cr3t", + "acl_default_policy": "deny", + "acl_down_policy": "extend-cache" +} +``` + +The servers will need to be restarted to load the new configuration. Please take care +to start the servers one at a time, and ensure each server has joined and is operating +correctly before starting another. + +The [`acl_master_token`](/docs/agent/options.html#acl_master_token) will be created +as a "management" type token automatically. The [`acl_master_token`](/docs/agent/options.html#acl_master_token) is only installed when a server acquires cluster leadership. If you would like to install or change the [`acl_master_token`](/docs/agent/options.html#acl_master_token), set the new value for @@ -138,6 +175,227 @@ for all servers. Once this is done, restart the current leader to force a leader Once the ACL system is bootstrapped, ACL tokens can be managed through the [ACL API](/api/acl.html). +**Create an Agent Token** + +After the servers are restarted above, you will see new errors in the logs of the Consul +servers related to permission denied errors: + +``` +2017/07/08 23:38:24 [WARN] agent: Node info update blocked by ACLs +2017/07/08 23:38:44 [WARN] agent: Coordinate update blocked by ACLs +``` + +These errors are because the agent doesn't yet have a properly configured +[`acl_agent_token`](/docs/agent/options.html#acl_agent_token) that it can use for its +own internal operations like updating its node information in the catalog, and performing +[anti-entropy](/docs/internals/anti-entropy.html) syncing. We can create a token using the +ACL API, and the ACL master token we set in the previous step: + +``` +$ curl \ + --request PUT \ + --header "X-Consul-Token: b1gs33cr3t" \ + --data \ +'{ + "Name": "Agent Token", + "Type": "client", + "Rules": "node \"\" { policy = \"write\" } service \"\" { policy = \"read\" }" +}' http://127.0.0.1:8500/v1/acl/create + +{"ID":"fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1"} +``` + +The returned value is the newly-created token. We can now add this to our Consul server +configuration and restart the servers once more to apply it: + +```json +{ + "acl_datacenter": "dc1", + "acl_master_token": "b1gs33cr3t", + "acl_default_policy": "deny", + "acl_down_policy": "extend-cache", + "acl_agent_token": "fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1" +} +``` + +With that ACL agent token set, the servers will be able to sync themselves with the +catalog: + +``` +2017/07/08 23:42:59 [INFO] agent: Synced node info +``` + +**Enable ACLs on the Consul Clients** + +Since ACL enforcement also occurs on the Consul clients, we need to also restart them +with a configuration file that enables ACLs: + +```json +{ + "acl_datacenter": "dc1", + "acl_down_policy": "extend-cache", + "acl_agent_token": "fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1" +} +``` + +We used the same ACL agent token that we created for the servers, which will work since +it was not specific to any node or set of service prefixes. In a more locked-down +environment it is recommended that each client get an ACL agent token with `node` write +privileges for just its own node name prefix, and `service` read privileges for just the +service prefixes expected to be registered on that client. + +[Anti-entropy](/docs/internals/anti-entropy.html) syncing requires the ACL agent token +to have `service` read privileges for all services that may be registered with the agent, +so generally an empty `service` prefix can be used, as shown in the example. + +Clients will report similar permission denied errors until they are restarted with an ACL +agent token. + +**Set an Anonymous Policy (Optional)** + +At this point ACLs are bootstrapped with ACL agent tokens configured, but there are no +other policies set up. Even basic operations like `consul members` will be restricted +by the ACL default policy of "deny": + +``` +$ consul members +``` + +We don't get an error since the ACL has filtered what we see, and we aren't allowed to +see any nodes by default. + +If we supply the token we created above we will be able to see a listing of nodes because +it has write privileges to an empty `node` prefix, meaning it has access to all nodes: + +``` +$ CONSUL_HTTP_TOKEN=fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1 consul members +Node Address Status Type Build Protocol DC +node-1 127.0.0.1:8301 alive server 0.9.0dev 2 dc1 +node-2 127.0.0.2:8301 alive client 0.9.0dev 2 dc1 +``` + +It's pretty common in many environments to allow listing of all nodes, even without a +token. The policies associated with the special anonymous token can be updated to +configure Consul's behavior when no token is supplied. The anonymous token is managed +like any other ACL token, except that `anonymous` is used for the ID. In this example +we will give the anonymous token read privileges for all nodes: + +``` +$ curl \ + --request PUT \ + --header "X-Consul-Token: b1gs33cr3t" \ + --data \ +'{ + "ID": "anonymous", + "Type": "client", + "Rules": "node \"\" { policy = \"read\" }" +}' http://127.0.0.1:8500/v1/acl/update + +{"ID":"anonymous"} +``` + +The anonymous token is implicitly used if no token is supplied, so now we can run +`consul members` without supplying a token and we will be able to see the nodes: + +``` +$ consul members +Node Address Status Type Build Protocol DC +node-1 127.0.0.1:8301 alive server 0.9.0dev 2 dc1 +node-2 127.0.0.2:8301 alive client 0.9.0dev 2 dc1 +``` + +The anonymous token is also used for DNS lookups since there's no way to pass a +token as part of a DNS request. Here's an example lookup for the "consul" service: + +``` +$ dig @127.0.0.1 -p 8600 consul.service.consul + +; <<>> DiG 9.8.3-P1 <<>> @127.0.0.1 -p 8600 consul.service.consul +; (1 server found) +;; global options: +cmd +;; Got answer: +;; ->>HEADER<<- opcode: QUERY, status: NXDOMAIN, id: 9648 +;; flags: qr aa rd; QUERY: 1, ANSWER: 0, AUTHORITY: 1, ADDITIONAL: 0 +;; WARNING: recursion requested but not available + +;; QUESTION SECTION: +;consul.service.consul. IN A + +;; AUTHORITY SECTION: +consul. 0 IN SOA ns.consul. postmaster.consul. 1499584110 3600 600 86400 0 + +;; Query time: 2 msec +;; SERVER: 127.0.0.1#8600(127.0.0.1) +;; WHEN: Sun Jul 9 00:08:30 2017 +;; MSG SIZE rcvd: 89 +``` + +Now we get an `NXDOMAIN` error because the anonymous token doesn't have access to the +"consul" service. Let's add that to the anonymous token's policy: + +``` +$ curl \ + --request PUT \ + --header "X-Consul-Token: b1gs33cr3t" \ + --data \ +'{ + "ID": "anonymous", + "Type": "client", + "Rules": "node \"\" { policy = \"read\" } service \"consul\" { policy = \"read\" }" +}' http://127.0.0.1:8500/v1/acl/update + +{"ID":"anonymous"} +``` + +With that new policy in place, the DNS lookup will succeed: + +``` +$ dig @127.0.0.1 -p 8600 consul.service.consul + +; <<>> DiG 9.8.3-P1 <<>> @127.0.0.1 -p 8600 consul.service.consul +; (1 server found) +;; global options: +cmd +;; Got answer: +;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 46006 +;; flags: qr aa rd; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 0 +;; WARNING: recursion requested but not available + +;; QUESTION SECTION: +;consul.service.consul. IN A + +;; ANSWER SECTION: +consul.service.consul. 0 IN A 127.0.0.1 + +;; Query time: 0 msec +;; SERVER: 127.0.0.1#8600(127.0.0.1) +;; WHEN: Sun Jul 9 00:11:14 2017 +;; MSG SIZE rcvd: 55 +``` + +The next section shows an alternative to the anonymous token. + +**Set Agent-specific Default Tokens (Optional)** + +An alternative to the anonymous token is the [`acl_token`](/docs/agent/options.html#acl_token) +configuration item. When a request is made to a particular Consul agent and no token is +supplied, the [`acl_token`](/docs/agent/options.html#acl_token) will be used for the token, +instead of being left empty which would normally invoke the anonymous token. + +This behaves very similarly to the anonymous token, but can be configured differently on each +agent, if desired. For example, this allows more fine grained control of what DNS requests a +given agent can service, or can give the agent read access to some key-value store prefixes by +default. + +If using [`acl_token`](/docs/agent/options.html#acl_token), then it's likely the anonymous +token will have a more restrictive policy than shown in the examples here. + +**Next Steps** + +The examples above configure a basic ACL environment with the ability to see all nodes +by default, and limited access to just the "consul" service. The [ACL API](/api/acl.html) +can be used to create tokens for applications specific to their intended use, and to create +more specific ACL agent tokens for each agent's expected role. + ## Rule Specification A core part of the ACL system is the rule language which is used to describe the policy