diff --git a/agent/agent.go b/agent/agent.go index 6001a4ee3..e81b91fc3 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -309,16 +309,6 @@ func (a *Agent) Start() error { a.delegate = server a.state.delegate = server - - // Automatically register the "consul" service on server nodes - consulService := structs.NodeService{ - Service: consul.ConsulServiceName, - ID: consul.ConsulServiceID, - Port: c.Ports.Server, - Tags: []string{}, - } - - a.state.AddService(&consulService, c.GetTokenForAgent()) } else { client, err := consul.NewClientLogger(consulCfg, a.logger) if err != nil { @@ -1309,17 +1299,17 @@ func (a *Agent) sendCoordinate() { members := a.LANMembers() grok, err := consul.CanServersUnderstandProtocol(members, 3) if err != nil { - a.logger.Printf("[ERR] agent: failed to check servers: %s", err) + a.logger.Printf("[ERR] agent: Failed to check servers: %s", err) continue } if !grok { - a.logger.Printf("[DEBUG] agent: skipping coordinate updates until servers are upgraded") + a.logger.Printf("[DEBUG] agent: Skipping coordinate updates until servers are upgraded") continue } c, err := a.GetLANCoordinate() if err != nil { - a.logger.Printf("[ERR] agent: failed to get coordinate: %s", err) + a.logger.Printf("[ERR] agent: Failed to get coordinate: %s", err) continue } @@ -1331,7 +1321,11 @@ func (a *Agent) sendCoordinate() { } var reply struct{} if err := a.RPC("Coordinate.Update", &req, &reply); err != nil { - a.logger.Printf("[ERR] agent: coordinate update error: %s", err) + if strings.Contains(err.Error(), permissionDenied) { + a.logger.Printf("[WARN] agent: Coordinate update blocked by ACLs") + } else { + a.logger.Printf("[ERR] agent: Coordinate update error: %v", err) + } continue } case <-a.shutdownCh: @@ -1561,13 +1555,6 @@ func (a *Agent) AddService(service *structs.NodeService, chkTypes []*structs.Che // RemoveService is used to remove a service entry. // The agent will make a best effort to ensure it is deregistered func (a *Agent) RemoveService(serviceID string, persist bool) error { - // Protect "consul" service from deletion by a user - if _, ok := a.delegate.(*consul.Server); ok && serviceID == consul.ConsulServiceID { - return fmt.Errorf( - "Deregistering the %s service is not allowed", - consul.ConsulServiceID) - } - // Validate ServiceID if serviceID == "" { return fmt.Errorf("ServiceID missing") @@ -2069,9 +2056,6 @@ func (a *Agent) loadServices(conf *Config) error { // known to the local agent. func (a *Agent) unloadServices() error { for _, service := range a.state.Services() { - if service.ID == consul.ConsulServiceID { - continue - } if err := a.RemoveService(service.ID, false); err != nil { return fmt.Errorf("Failed deregistering service '%s': %v", service.ID, err) } diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 401248539..84b9d3a7d 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -57,7 +57,7 @@ func TestAgent_Services(t *testing.T) { t.Fatalf("Err: %v", err) } val := obj.(map[string]*structs.NodeService) - if len(val) != 2 { + if len(val) != 1 { t.Fatalf("bad services: %v", obj) } if val["mysql"].Port != 5000 { @@ -70,6 +70,14 @@ func TestAgent_Services_ACLFilter(t *testing.T) { a := NewTestAgent(t.Name(), TestACLConfig()) defer a.Shutdown() + srv1 := &structs.NodeService{ + ID: "mysql", + Service: "mysql", + Tags: []string{"master"}, + Port: 5000, + } + a.state.AddService(srv1, "") + t.Run("no token", func(t *testing.T) { req, _ := http.NewRequest("GET", "/v1/agent/services", nil) obj, err := a.srv.AgentServices(nil, req) diff --git a/agent/agent_test.go b/agent/agent_test.go index 4b9470a94..97f772cf4 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -492,11 +492,6 @@ func TestAgent_RemoveService(t *testing.T) { t.Fatalf("err: %v", err) } - // Remove the consul service - if err := a.RemoveService("consul", false); err == nil { - t.Fatalf("should have errored") - } - // Remove without an ID if err := a.RemoveService("", false); err == nil { t.Fatalf("should have errored") @@ -882,34 +877,6 @@ func TestAgent_updateTTLCheck(t *testing.T) { } } -func TestAgent_ConsulService(t *testing.T) { - t.Parallel() - a := NewTestAgent(t.Name(), nil) - defer a.Shutdown() - - // Consul service is registered - services := a.state.Services() - if _, ok := services[consul.ConsulServiceID]; !ok { - t.Fatalf("%s service should be registered", consul.ConsulServiceID) - } - - // todo(fs): data race - func() { - a.state.Lock() - defer a.state.Unlock() - - // Perform anti-entropy on consul service - if err := a.state.syncService(consul.ConsulServiceID); err != nil { - t.Fatalf("err: %s", err) - } - }() - - // Consul service should be in sync - if !a.state.serviceStatus[consul.ConsulServiceID].inSync { - t.Fatalf("%s service should be in sync", consul.ConsulServiceID) - } -} - func TestAgent_PersistService(t *testing.T) { t.Parallel() cfg := TestConfig() @@ -1432,19 +1399,8 @@ func TestAgent_unloadServices(t *testing.T) { if err := a.unloadServices(); err != nil { t.Fatalf("err: %s", err) } - - // Make sure it was unloaded and the consul service remains - found = false - for id := range a.state.Services() { - if id == svc.ID { - t.Fatalf("should have unloaded services") - } - if id == consul.ConsulServiceID { - found = true - } - } - if !found { - t.Fatalf("consul service should not be removed") + if len(a.state.Services()) != 0 { + t.Fatalf("should have unloaded services") } } diff --git a/agent/consul/acl.go b/agent/consul/acl.go index 7845ab53d..ead3a25be 100644 --- a/agent/consul/acl.go +++ b/agent/consul/acl.go @@ -341,7 +341,7 @@ func (f *aclFilter) allowService(service string) bool { return true } - if !f.enforceVersion8 && service == ConsulServiceID { + if !f.enforceVersion8 && service == structs.ConsulServiceID { return true } diff --git a/agent/consul/catalog_endpoint.go b/agent/consul/catalog_endpoint.go index 1e75b16b6..db7b6c7bf 100644 --- a/agent/consul/catalog_endpoint.go +++ b/agent/consul/catalog_endpoint.go @@ -64,7 +64,7 @@ func (c *Catalog) Register(args *structs.RegisterRequest, reply *struct{}) error // is going away after version 0.8). We check this same policy // later if version 0.8 is enabled, so we can eventually just // delete this and do all the ACL checks down there. - if args.Service.Service != ConsulServiceName { + if args.Service.Service != structs.ConsulServiceName { if acl != nil && !acl.ServiceWrite(args.Service.Service) { return errPermissionDenied } diff --git a/agent/consul/health_endpoint_test.go b/agent/consul/health_endpoint_test.go index be64a0310..90bb49163 100644 --- a/agent/consul/health_endpoint_test.go +++ b/agent/consul/health_endpoint_test.go @@ -54,7 +54,7 @@ func TestHealth_ChecksInState(t *testing.T) { if checks[0].Name != "memory utilization" { t.Fatalf("Bad: %v", checks[0]) } - if checks[1].CheckID != SerfCheckID { + if checks[1].CheckID != structs.SerfCheckID { t.Fatalf("Bad: %v", checks[1]) } } diff --git a/agent/consul/leader.go b/agent/consul/leader.go index 2f42c5293..ba77b4492 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -18,14 +18,8 @@ import ( ) const ( - SerfCheckID types.CheckID = "serfHealth" - SerfCheckName = "Serf Health Status" - SerfCheckAliveOutput = "Agent alive and reachable" - SerfCheckFailedOutput = "Agent not live or unreachable" - ConsulServiceID = "consul" - ConsulServiceName = "consul" - newLeaderEvent = "consul:new-leader" - barrierWriteTimeout = 2 * time.Minute + newLeaderEvent = "consul:new-leader" + barrierWriteTimeout = 2 * time.Minute ) // monitorLeadership is used to monitor if we acquire or lose our role @@ -334,7 +328,7 @@ func (s *Server) reconcileReaped(known map[string]struct{}) error { } for _, check := range checks { // Ignore any non serf checks - if check.CheckID != SerfCheckID { + if check.CheckID != structs.SerfCheckID { continue } @@ -359,7 +353,7 @@ func (s *Server) reconcileReaped(known map[string]struct{}) error { } serverPort := 0 for _, service := range services.Services { - if service.ID == ConsulServiceID { + if service.ID == structs.ConsulServiceID { serverPort = service.Port break } @@ -430,8 +424,8 @@ func (s *Server) handleAliveMember(member serf.Member) error { var service *structs.NodeService if valid, parts := agent.IsConsulServer(member); valid { service = &structs.NodeService{ - ID: ConsulServiceID, - Service: ConsulServiceName, + ID: structs.ConsulServiceID, + Service: structs.ConsulServiceName, Port: parts.Port, } @@ -473,7 +467,7 @@ func (s *Server) handleAliveMember(member serf.Member) error { return err } for _, check := range checks { - if check.CheckID == SerfCheckID && check.Status == api.HealthPassing { + if check.CheckID == structs.SerfCheckID && check.Status == api.HealthPassing { return nil } } @@ -490,10 +484,10 @@ AFTER_CHECK: Service: service, Check: &structs.HealthCheck{ Node: member.Name, - CheckID: SerfCheckID, - Name: SerfCheckName, + CheckID: structs.SerfCheckID, + Name: structs.SerfCheckName, Status: api.HealthPassing, - Output: SerfCheckAliveOutput, + Output: structs.SerfCheckAliveOutput, }, // If there's existing information about the node, do not @@ -520,7 +514,7 @@ func (s *Server) handleFailedMember(member serf.Member) error { return err } for _, check := range checks { - if check.CheckID == SerfCheckID && check.Status == api.HealthCritical { + if check.CheckID == structs.SerfCheckID && check.Status == api.HealthCritical { return nil } } @@ -535,10 +529,10 @@ func (s *Server) handleFailedMember(member serf.Member) error { Address: member.Addr.String(), Check: &structs.HealthCheck{ Node: member.Name, - CheckID: SerfCheckID, - Name: SerfCheckName, + CheckID: structs.SerfCheckID, + Name: structs.SerfCheckName, Status: api.HealthCritical, - Output: SerfCheckFailedOutput, + Output: structs.SerfCheckFailedOutput, }, // If there's existing information about the node, do not diff --git a/agent/consul/leader_test.go b/agent/consul/leader_test.go index 35a98d45b..052f86d9e 100644 --- a/agent/consul/leader_test.go +++ b/agent/consul/leader_test.go @@ -53,10 +53,10 @@ func TestLeader_RegisterMember(t *testing.T) { if len(checks) != 1 { t.Fatalf("client missing check") } - if checks[0].CheckID != SerfCheckID { + if checks[0].CheckID != structs.SerfCheckID { t.Fatalf("bad check: %v", checks[0]) } - if checks[0].Name != SerfCheckName { + if checks[0].Name != structs.SerfCheckName { t.Fatalf("bad check: %v", checks[0]) } if checks[0].Status != api.HealthPassing { @@ -125,10 +125,10 @@ func TestLeader_FailedMember(t *testing.T) { if len(checks) != 1 { t.Fatalf("client missing check") } - if checks[0].CheckID != SerfCheckID { + if checks[0].CheckID != structs.SerfCheckID { t.Fatalf("bad check: %v", checks[0]) } - if checks[0].Name != SerfCheckName { + if checks[0].Name != structs.SerfCheckName { t.Fatalf("bad check: %v", checks[0]) } @@ -270,8 +270,8 @@ func TestLeader_Reconcile_ReapMember(t *testing.T) { Address: "127.1.1.1", Check: &structs.HealthCheck{ Node: "no-longer-around", - CheckID: SerfCheckID, - Name: SerfCheckName, + CheckID: structs.SerfCheckID, + Name: structs.SerfCheckName, Status: api.HealthCritical, }, WriteRequest: structs.WriteRequest{ @@ -378,8 +378,8 @@ func TestLeader_Reconcile_Races(t *testing.T) { NodeMeta: map[string]string{"hello": "world"}, Check: &structs.HealthCheck{ Node: c1.config.NodeName, - CheckID: SerfCheckID, - Name: SerfCheckName, + CheckID: structs.SerfCheckID, + Name: structs.SerfCheckName, Status: api.HealthCritical, Output: "", }, diff --git a/agent/consul/structs/catalog.go b/agent/consul/structs/catalog.go new file mode 100644 index 000000000..b6b443f6f --- /dev/null +++ b/agent/consul/structs/catalog.go @@ -0,0 +1,21 @@ +package structs + +import ( + "github.com/hashicorp/consul/types" +) + +// These are used to manage the built-in "serfHealth" check that's attached +// to every node in the catalog. +const ( + SerfCheckID types.CheckID = "serfHealth" + SerfCheckName = "Serf Health Status" + SerfCheckAliveOutput = "Agent alive and reachable" + SerfCheckFailedOutput = "Agent not live or unreachable" +) + +// These are used to manage the "consul" service that's attached to every Consul +// server node in the catalog. +const ( + ConsulServiceID = "consul" + ConsulServiceName = "consul" +) diff --git a/agent/local.go b/agent/local.go index f359f40da..d71ca2211 100644 --- a/agent/local.go +++ b/agent/local.go @@ -9,7 +9,6 @@ import ( "sync/atomic" "time" - "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/consul/structs" "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/lib" @@ -483,6 +482,11 @@ func (l *localState) setSyncState() error { // If we don't have the service locally, deregister it existing, ok := l.services[id] if !ok { + // The consul service is created automatically, and does + // not need to be deregistered. + if id == structs.ConsulServiceID { + continue + } l.serviceStatus[id] = syncStatus{inSync: false} continue } @@ -517,8 +521,8 @@ func (l *localState) setSyncState() error { existing, ok := l.checks[id] if !ok { // The Serf check is created automatically, and does not - // need to be registered - if id == consul.SerfCheckID { + // need to be deregistered. + if id == structs.SerfCheckID { continue } l.checkStatus[id] = syncStatus{inSync: false} diff --git a/agent/local_test.go b/agent/local_test.go index 5f5e19bfd..de66480df 100644 --- a/agent/local_test.go +++ b/agent/local_test.go @@ -161,7 +161,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) { if !reflect.DeepEqual(serv, srv6) { r.Fatalf("bad: %v %v", serv, srv6) } - case "consul": + case structs.ConsulServiceID: // ignore default: r.Fatalf("unexpected service: %v", id) @@ -173,10 +173,10 @@ func TestAgentAntiEntropy_Services(t *testing.T) { defer a.state.RUnlock() // Check the local state - if len(a.state.services) != 6 { + if len(a.state.services) != 5 { r.Fatalf("bad: %v", a.state.services) } - if len(a.state.serviceStatus) != 6 { + if len(a.state.serviceStatus) != 5 { r.Fatalf("bad: %v", a.state.serviceStatus) } for name, status := range a.state.serviceStatus { @@ -222,7 +222,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) { if !reflect.DeepEqual(serv, srv6) { r.Fatalf("bad: %v %v", serv, srv6) } - case "consul": + case structs.ConsulServiceID: // ignore default: r.Fatalf("unexpected service: %v", id) @@ -234,10 +234,10 @@ func TestAgentAntiEntropy_Services(t *testing.T) { defer a.state.RUnlock() // Check the local state - if len(a.state.services) != 5 { + if len(a.state.services) != 4 { r.Fatalf("bad: %v", a.state.services) } - if len(a.state.serviceStatus) != 5 { + if len(a.state.serviceStatus) != 4 { r.Fatalf("bad: %v", a.state.serviceStatus) } for name, status := range a.state.serviceStatus { @@ -333,7 +333,7 @@ func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) { !reflect.DeepEqual(serv.Tags, []string{"tag2"}) { r.Fatalf("bad: %v %v", serv, srv2) } - case "consul": + case structs.ConsulServiceID: // ignore default: r.Fatalf("unexpected service: %v", id) @@ -575,7 +575,7 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { if !reflect.DeepEqual(serv, srv2) { t.Fatalf("bad: %#v %#v", serv, srv2) } - case "consul": + case structs.ConsulServiceID: // ignore default: t.Fatalf("unexpected service: %v", id) @@ -588,10 +588,10 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { defer a.state.RUnlock() // Check the local state - if len(a.state.services) != 3 { + if len(a.state.services) != 2 { t.Fatalf("bad: %v", a.state.services) } - if len(a.state.serviceStatus) != 3 { + if len(a.state.serviceStatus) != 2 { t.Fatalf("bad: %v", a.state.serviceStatus) } for name, status := range a.state.serviceStatus { @@ -634,7 +634,7 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { t.Fatalf("should not be permitted") case "api": t.Fatalf("should be deleted") - case "consul": + case structs.ConsulServiceID: // ignore default: t.Fatalf("unexpected service: %v", id) @@ -647,10 +647,10 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { defer a.state.RUnlock() // Check the local state - if len(a.state.services) != 2 { + if len(a.state.services) != 1 { t.Fatalf("bad: %v", a.state.services) } - if len(a.state.serviceStatus) != 2 { + if len(a.state.serviceStatus) != 1 { t.Fatalf("bad: %v", a.state.serviceStatus) } for name, status := range a.state.serviceStatus { @@ -975,7 +975,7 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) { if !reflect.DeepEqual(serv, srv2) { t.Fatalf("bad: %#v %#v", serv, srv2) } - case "consul": + case structs.ConsulServiceID: // ignore default: t.Fatalf("unexpected service: %v", id) @@ -988,10 +988,10 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) { defer a.state.RUnlock() // Check the local state - if len(a.state.services) != 3 { + if len(a.state.services) != 2 { t.Fatalf("bad: %v", a.state.services) } - if len(a.state.serviceStatus) != 3 { + if len(a.state.serviceStatus) != 2 { t.Fatalf("bad: %v", a.state.serviceStatus) } for name, status := range a.state.serviceStatus { diff --git a/agent/session_endpoint.go b/agent/session_endpoint.go index 8a5e2a311..685b30bc9 100644 --- a/agent/session_endpoint.go +++ b/agent/session_endpoint.go @@ -6,7 +6,6 @@ import ( "strings" "time" - "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/consul/structs" "github.com/hashicorp/consul/types" ) @@ -39,7 +38,7 @@ func (s *HTTPServer) SessionCreate(resp http.ResponseWriter, req *http.Request) Op: structs.SessionCreate, Session: structs.Session{ Node: s.agent.config.NodeName, - Checks: []types.CheckID{consul.SerfCheckID}, + Checks: []types.CheckID{structs.SerfCheckID}, LockDelay: 15 * time.Second, Behavior: structs.SessionKeysRelease, TTL: "", diff --git a/agent/session_endpoint_test.go b/agent/session_endpoint_test.go index 4dc098cfd..ce3b5a6f7 100644 --- a/agent/session_endpoint_test.go +++ b/agent/session_endpoint_test.go @@ -8,7 +8,6 @@ import ( "testing" "time" - "github.com/hashicorp/consul/agent/consul" "github.com/hashicorp/consul/agent/consul/structs" "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/types" @@ -43,7 +42,7 @@ func TestSessionCreate(t *testing.T) { raw := map[string]interface{}{ "Name": "my-cool-session", "Node": a.Config.NodeName, - "Checks": []types.CheckID{consul.SerfCheckID, "consul"}, + "Checks": []types.CheckID{structs.SerfCheckID, "consul"}, "LockDelay": "20s", } enc.Encode(raw) @@ -89,7 +88,7 @@ func TestSessionCreateDelete(t *testing.T) { raw := map[string]interface{}{ "Name": "my-cool-session", "Node": a.Config.NodeName, - "Checks": []types.CheckID{consul.SerfCheckID, "consul"}, + "Checks": []types.CheckID{structs.SerfCheckID, "consul"}, "LockDelay": "20s", "Behavior": structs.SessionKeysDelete, } diff --git a/website/source/docs/guides/acl.html.md b/website/source/docs/guides/acl.html.md index 56249baaf..285fa833b 100644 --- a/website/source/docs/guides/acl.html.md +++ b/website/source/docs/guides/acl.html.md @@ -46,8 +46,8 @@ Tokens are bound to a set of rules that control which Consul resources the token has access to. Policies can be defined in either a whitelist or blacklist mode depending on the configuration of [`acl_default_policy`](/docs/agent/options.html#acl_default_policy). If the default -policy is to "deny all" actions, then token rules can be set to whitelist specific -actions. In the inverse, the "allow all" default behavior is a blacklist where rules +policy is to "deny" all actions, then token rules can be set to whitelist specific +actions. In the inverse, the "allow" all default behavior is a blacklist where rules are used to prohibit actions. By default, Consul will allow all actions. The following table summarizes the ACL policies that are available for constructing @@ -100,10 +100,17 @@ the cache TTL is an upper bound on the staleness of policy that is enforced. It possible to set a zero TTL, but this has adverse performance impacts, as every request requires refreshing the policy via an RPC call. -#### Enabling ACLs +During an outage of the ACL datacenter, or loss of connectivity, the cache will be +used as long as the TTL is valid, or the cache may be extended if the +[`acl_down_policy`](/docs/agent/options.html#acl_down_policy) is set accordingly. +This configuration also allows the ACL system to fail open or closed. +[ACL replication](#replication) is also available to allow for the full set of ACL +tokens to be replicated for use during an outage. -Enabling ACLs is done by setting up the following configuration options. These are -marked as to whether they are set on servers, clients, or both. +#### Configuring ACLs + +ACLs are configured using several different configuration options. These are marked +as to whether they are set on servers, clients, or both. | Configuration Option | Servers | Clients | Purpose | | -------------------- | ------- | ------- | ------- | @@ -122,13 +129,43 @@ system, or accessing Consul in special situations: | Special Token | Servers | Clients | Purpose | | ------------- | ------- | ------- | ------- | | [`acl_agent_master_token`](/docs/agent/options.html#acl_agent_master_token) | `OPTIONAL` | `OPTIONAL` | Special token that can be used to access [Agent API](/api/agent.html) when the ACL datacenter isn't available, or servers are offline (for clients); used for setting up the cluster such as doing initial join operations | -| [`acl_agent_token`](/docs/agent/options.html#acl_agent_token) | `OPTIONAL` | `OPTIONAL` | Special token that is used for an agent's internal operations with the [Catalog API](/api/catalog.html); this needs to have at least `node` policy access so the agent can self update its registration information | -| [`acl_master_token`](/docs/agent/options.html#acl_master_token) | `REQUIRED` | `N/A` | Special token used to bootstrap the ACL system, see details below. | +| [`acl_agent_token`](/docs/agent/options.html#acl_agent_token) | `OPTIONAL` | `OPTIONAL` | Special token that is used for an agent's internal operations with the [Catalog API](/api/catalog.html); this needs to have at least `node` policy access so the agent can self update its registration information, and also needs `service` read access for all services that will be registered with that node for [anti-entropy](/docs/internals/anti-entropy.html) syncing | +| [`acl_master_token`](/docs/agent/options.html#acl_master_token) | `REQUIRED` | `N/A` | Special token used to bootstrap the ACL system, see details below | | [`acl_token`](/docs/agent/options.html#acl_token) | `OPTIONAL` | `OPTIONAL` | Default token to use for client requests where no token is supplied; this is often configured with read-only access to services to enable DNS service discovery on agents | -Bootstrapping the ACL system is done by providing an initial -[`acl_master_token`](/docs/agent/options.html#acl_master_token) which will be created -as a "management" type token if it does not exist. The +#### Bootstrapping ACLs + +Bootstrapping ACLs on a new cluster requires a few steps, outlined in the example in this +section. + +**Enable ACLs on the Consul Servers** + +The first step for bootstrapping ACLs is to enable ACLs on the Consul servers in the ACL +datacenter. In this example, we are configuring the following: + +1. An ACL datacenter of "dc1", which is where these servers are +2. An ACL master token of "b1gs33cr3t" +3. A default policy of "deny" which means we are in whitelist mode +4. A down policy of "extend-cache" which means that we will ignore token TTLs during an + outage + +Here's the corresponding JSON configuration file: + +```json +{ + "acl_datacenter": "dc1", + "acl_master_token": "b1gs33cr3t", + "acl_default_policy": "deny", + "acl_down_policy": "extend-cache" +} +``` + +The servers will need to be restarted to load the new configuration. Please take care +to start the servers one at a time, and ensure each server has joined and is operating +correctly before starting another. + +The [`acl_master_token`](/docs/agent/options.html#acl_master_token) will be created +as a "management" type token automatically. The [`acl_master_token`](/docs/agent/options.html#acl_master_token) is only installed when a server acquires cluster leadership. If you would like to install or change the [`acl_master_token`](/docs/agent/options.html#acl_master_token), set the new value for @@ -138,6 +175,227 @@ for all servers. Once this is done, restart the current leader to force a leader Once the ACL system is bootstrapped, ACL tokens can be managed through the [ACL API](/api/acl.html). +**Create an Agent Token** + +After the servers are restarted above, you will see new errors in the logs of the Consul +servers related to permission denied errors: + +``` +2017/07/08 23:38:24 [WARN] agent: Node info update blocked by ACLs +2017/07/08 23:38:44 [WARN] agent: Coordinate update blocked by ACLs +``` + +These errors are because the agent doesn't yet have a properly configured +[`acl_agent_token`](/docs/agent/options.html#acl_agent_token) that it can use for its +own internal operations like updating its node information in the catalog, and performing +[anti-entropy](/docs/internals/anti-entropy.html) syncing. We can create a token using the +ACL API, and the ACL master token we set in the previous step: + +``` +$ curl \ + --request PUT \ + --header "X-Consul-Token: b1gs33cr3t" \ + --data \ +'{ + "Name": "Agent Token", + "Type": "client", + "Rules": "node \"\" { policy = \"write\" } service \"\" { policy = \"read\" }" +}' http://127.0.0.1:8500/v1/acl/create + +{"ID":"fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1"} +``` + +The returned value is the newly-created token. We can now add this to our Consul server +configuration and restart the servers once more to apply it: + +```json +{ + "acl_datacenter": "dc1", + "acl_master_token": "b1gs33cr3t", + "acl_default_policy": "deny", + "acl_down_policy": "extend-cache", + "acl_agent_token": "fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1" +} +``` + +With that ACL agent token set, the servers will be able to sync themselves with the +catalog: + +``` +2017/07/08 23:42:59 [INFO] agent: Synced node info +``` + +**Enable ACLs on the Consul Clients** + +Since ACL enforcement also occurs on the Consul clients, we need to also restart them +with a configuration file that enables ACLs: + +```json +{ + "acl_datacenter": "dc1", + "acl_down_policy": "extend-cache", + "acl_agent_token": "fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1" +} +``` + +We used the same ACL agent token that we created for the servers, which will work since +it was not specific to any node or set of service prefixes. In a more locked-down +environment it is recommended that each client get an ACL agent token with `node` write +privileges for just its own node name prefix, and `service` read privileges for just the +service prefixes expected to be registered on that client. + +[Anti-entropy](/docs/internals/anti-entropy.html) syncing requires the ACL agent token +to have `service` read privileges for all services that may be registered with the agent, +so generally an empty `service` prefix can be used, as shown in the example. + +Clients will report similar permission denied errors until they are restarted with an ACL +agent token. + +**Set an Anonymous Policy (Optional)** + +At this point ACLs are bootstrapped with ACL agent tokens configured, but there are no +other policies set up. Even basic operations like `consul members` will be restricted +by the ACL default policy of "deny": + +``` +$ consul members +``` + +We don't get an error since the ACL has filtered what we see, and we aren't allowed to +see any nodes by default. + +If we supply the token we created above we will be able to see a listing of nodes because +it has write privileges to an empty `node` prefix, meaning it has access to all nodes: + +``` +$ CONSUL_HTTP_TOKEN=fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1 consul members +Node Address Status Type Build Protocol DC +node-1 127.0.0.1:8301 alive server 0.9.0dev 2 dc1 +node-2 127.0.0.2:8301 alive client 0.9.0dev 2 dc1 +``` + +It's pretty common in many environments to allow listing of all nodes, even without a +token. The policies associated with the special anonymous token can be updated to +configure Consul's behavior when no token is supplied. The anonymous token is managed +like any other ACL token, except that `anonymous` is used for the ID. In this example +we will give the anonymous token read privileges for all nodes: + +``` +$ curl \ + --request PUT \ + --header "X-Consul-Token: b1gs33cr3t" \ + --data \ +'{ + "ID": "anonymous", + "Type": "client", + "Rules": "node \"\" { policy = \"read\" }" +}' http://127.0.0.1:8500/v1/acl/update + +{"ID":"anonymous"} +``` + +The anonymous token is implicitly used if no token is supplied, so now we can run +`consul members` without supplying a token and we will be able to see the nodes: + +``` +$ consul members +Node Address Status Type Build Protocol DC +node-1 127.0.0.1:8301 alive server 0.9.0dev 2 dc1 +node-2 127.0.0.2:8301 alive client 0.9.0dev 2 dc1 +``` + +The anonymous token is also used for DNS lookups since there's no way to pass a +token as part of a DNS request. Here's an example lookup for the "consul" service: + +``` +$ dig @127.0.0.1 -p 8600 consul.service.consul + +; <<>> DiG 9.8.3-P1 <<>> @127.0.0.1 -p 8600 consul.service.consul +; (1 server found) +;; global options: +cmd +;; Got answer: +;; ->>HEADER<<- opcode: QUERY, status: NXDOMAIN, id: 9648 +;; flags: qr aa rd; QUERY: 1, ANSWER: 0, AUTHORITY: 1, ADDITIONAL: 0 +;; WARNING: recursion requested but not available + +;; QUESTION SECTION: +;consul.service.consul. IN A + +;; AUTHORITY SECTION: +consul. 0 IN SOA ns.consul. postmaster.consul. 1499584110 3600 600 86400 0 + +;; Query time: 2 msec +;; SERVER: 127.0.0.1#8600(127.0.0.1) +;; WHEN: Sun Jul 9 00:08:30 2017 +;; MSG SIZE rcvd: 89 +``` + +Now we get an `NXDOMAIN` error because the anonymous token doesn't have access to the +"consul" service. Let's add that to the anonymous token's policy: + +``` +$ curl \ + --request PUT \ + --header "X-Consul-Token: b1gs33cr3t" \ + --data \ +'{ + "ID": "anonymous", + "Type": "client", + "Rules": "node \"\" { policy = \"read\" } service \"consul\" { policy = \"read\" }" +}' http://127.0.0.1:8500/v1/acl/update + +{"ID":"anonymous"} +``` + +With that new policy in place, the DNS lookup will succeed: + +``` +$ dig @127.0.0.1 -p 8600 consul.service.consul + +; <<>> DiG 9.8.3-P1 <<>> @127.0.0.1 -p 8600 consul.service.consul +; (1 server found) +;; global options: +cmd +;; Got answer: +;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 46006 +;; flags: qr aa rd; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 0 +;; WARNING: recursion requested but not available + +;; QUESTION SECTION: +;consul.service.consul. IN A + +;; ANSWER SECTION: +consul.service.consul. 0 IN A 127.0.0.1 + +;; Query time: 0 msec +;; SERVER: 127.0.0.1#8600(127.0.0.1) +;; WHEN: Sun Jul 9 00:11:14 2017 +;; MSG SIZE rcvd: 55 +``` + +The next section shows an alternative to the anonymous token. + +**Set Agent-specific Default Tokens (Optional)** + +An alternative to the anonymous token is the [`acl_token`](/docs/agent/options.html#acl_token) +configuration item. When a request is made to a particular Consul agent and no token is +supplied, the [`acl_token`](/docs/agent/options.html#acl_token) will be used for the token, +instead of being left empty which would normally invoke the anonymous token. + +This behaves very similarly to the anonymous token, but can be configured differently on each +agent, if desired. For example, this allows more fine grained control of what DNS requests a +given agent can service, or can give the agent read access to some key-value store prefixes by +default. + +If using [`acl_token`](/docs/agent/options.html#acl_token), then it's likely the anonymous +token will have a more restrictive policy than shown in the examples here. + +**Next Steps** + +The examples above configure a basic ACL environment with the ability to see all nodes +by default, and limited access to just the "consul" service. The [ACL API](/api/acl.html) +can be used to create tokens for applications specific to their intended use, and to create +more specific ACL agent tokens for each agent's expected role. + ## Rule Specification A core part of the ACL system is the rule language which is used to describe the policy