Cleans up version 8 ACLs in the agent and the docs. (#3248)

* Moves magic check and service constants into shared structs package.

* Removes the "consul" service from local state.

Since this service is added by the leader, it doesn't really make sense to
also keep it in local state (which requires special ACLs to configure), and
requires a bunch of special cases in the local state logic. This requires
fewer special cases and makes ACL bootstrapping cleaner.

* Makes coordinate update ACL log message a warning, similar to other AE warnings.

* Adds much more detailed examples for bootstrapping ACLs.

This can hopefully replace https://gist.github.com/slackpad/d89ce0e1cc0802c3c4f2d84932fa3234.
This commit is contained in:
James Phillips 2017-07-13 22:33:47 -07:00 committed by GitHub
parent a3f7faa7e4
commit 8572931afe
14 changed files with 359 additions and 136 deletions

View File

@ -309,16 +309,6 @@ func (a *Agent) Start() error {
a.delegate = server
a.state.delegate = server
// Automatically register the "consul" service on server nodes
consulService := structs.NodeService{
Service: consul.ConsulServiceName,
ID: consul.ConsulServiceID,
Port: c.Ports.Server,
Tags: []string{},
}
a.state.AddService(&consulService, c.GetTokenForAgent())
} else {
client, err := consul.NewClientLogger(consulCfg, a.logger)
if err != nil {
@ -1309,17 +1299,17 @@ func (a *Agent) sendCoordinate() {
members := a.LANMembers()
grok, err := consul.CanServersUnderstandProtocol(members, 3)
if err != nil {
a.logger.Printf("[ERR] agent: failed to check servers: %s", err)
a.logger.Printf("[ERR] agent: Failed to check servers: %s", err)
continue
}
if !grok {
a.logger.Printf("[DEBUG] agent: skipping coordinate updates until servers are upgraded")
a.logger.Printf("[DEBUG] agent: Skipping coordinate updates until servers are upgraded")
continue
}
c, err := a.GetLANCoordinate()
if err != nil {
a.logger.Printf("[ERR] agent: failed to get coordinate: %s", err)
a.logger.Printf("[ERR] agent: Failed to get coordinate: %s", err)
continue
}
@ -1331,7 +1321,11 @@ func (a *Agent) sendCoordinate() {
}
var reply struct{}
if err := a.RPC("Coordinate.Update", &req, &reply); err != nil {
a.logger.Printf("[ERR] agent: coordinate update error: %s", err)
if strings.Contains(err.Error(), permissionDenied) {
a.logger.Printf("[WARN] agent: Coordinate update blocked by ACLs")
} else {
a.logger.Printf("[ERR] agent: Coordinate update error: %v", err)
}
continue
}
case <-a.shutdownCh:
@ -1561,13 +1555,6 @@ func (a *Agent) AddService(service *structs.NodeService, chkTypes []*structs.Che
// RemoveService is used to remove a service entry.
// The agent will make a best effort to ensure it is deregistered
func (a *Agent) RemoveService(serviceID string, persist bool) error {
// Protect "consul" service from deletion by a user
if _, ok := a.delegate.(*consul.Server); ok && serviceID == consul.ConsulServiceID {
return fmt.Errorf(
"Deregistering the %s service is not allowed",
consul.ConsulServiceID)
}
// Validate ServiceID
if serviceID == "" {
return fmt.Errorf("ServiceID missing")
@ -2069,9 +2056,6 @@ func (a *Agent) loadServices(conf *Config) error {
// known to the local agent.
func (a *Agent) unloadServices() error {
for _, service := range a.state.Services() {
if service.ID == consul.ConsulServiceID {
continue
}
if err := a.RemoveService(service.ID, false); err != nil {
return fmt.Errorf("Failed deregistering service '%s': %v", service.ID, err)
}

View File

@ -57,7 +57,7 @@ func TestAgent_Services(t *testing.T) {
t.Fatalf("Err: %v", err)
}
val := obj.(map[string]*structs.NodeService)
if len(val) != 2 {
if len(val) != 1 {
t.Fatalf("bad services: %v", obj)
}
if val["mysql"].Port != 5000 {
@ -70,6 +70,14 @@ func TestAgent_Services_ACLFilter(t *testing.T) {
a := NewTestAgent(t.Name(), TestACLConfig())
defer a.Shutdown()
srv1 := &structs.NodeService{
ID: "mysql",
Service: "mysql",
Tags: []string{"master"},
Port: 5000,
}
a.state.AddService(srv1, "")
t.Run("no token", func(t *testing.T) {
req, _ := http.NewRequest("GET", "/v1/agent/services", nil)
obj, err := a.srv.AgentServices(nil, req)

View File

@ -492,11 +492,6 @@ func TestAgent_RemoveService(t *testing.T) {
t.Fatalf("err: %v", err)
}
// Remove the consul service
if err := a.RemoveService("consul", false); err == nil {
t.Fatalf("should have errored")
}
// Remove without an ID
if err := a.RemoveService("", false); err == nil {
t.Fatalf("should have errored")
@ -882,34 +877,6 @@ func TestAgent_updateTTLCheck(t *testing.T) {
}
}
func TestAgent_ConsulService(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), nil)
defer a.Shutdown()
// Consul service is registered
services := a.state.Services()
if _, ok := services[consul.ConsulServiceID]; !ok {
t.Fatalf("%s service should be registered", consul.ConsulServiceID)
}
// todo(fs): data race
func() {
a.state.Lock()
defer a.state.Unlock()
// Perform anti-entropy on consul service
if err := a.state.syncService(consul.ConsulServiceID); err != nil {
t.Fatalf("err: %s", err)
}
}()
// Consul service should be in sync
if !a.state.serviceStatus[consul.ConsulServiceID].inSync {
t.Fatalf("%s service should be in sync", consul.ConsulServiceID)
}
}
func TestAgent_PersistService(t *testing.T) {
t.Parallel()
cfg := TestConfig()
@ -1432,19 +1399,8 @@ func TestAgent_unloadServices(t *testing.T) {
if err := a.unloadServices(); err != nil {
t.Fatalf("err: %s", err)
}
// Make sure it was unloaded and the consul service remains
found = false
for id := range a.state.Services() {
if id == svc.ID {
t.Fatalf("should have unloaded services")
}
if id == consul.ConsulServiceID {
found = true
}
}
if !found {
t.Fatalf("consul service should not be removed")
if len(a.state.Services()) != 0 {
t.Fatalf("should have unloaded services")
}
}

View File

@ -341,7 +341,7 @@ func (f *aclFilter) allowService(service string) bool {
return true
}
if !f.enforceVersion8 && service == ConsulServiceID {
if !f.enforceVersion8 && service == structs.ConsulServiceID {
return true
}

View File

@ -64,7 +64,7 @@ func (c *Catalog) Register(args *structs.RegisterRequest, reply *struct{}) error
// is going away after version 0.8). We check this same policy
// later if version 0.8 is enabled, so we can eventually just
// delete this and do all the ACL checks down there.
if args.Service.Service != ConsulServiceName {
if args.Service.Service != structs.ConsulServiceName {
if acl != nil && !acl.ServiceWrite(args.Service.Service) {
return errPermissionDenied
}

View File

@ -54,7 +54,7 @@ func TestHealth_ChecksInState(t *testing.T) {
if checks[0].Name != "memory utilization" {
t.Fatalf("Bad: %v", checks[0])
}
if checks[1].CheckID != SerfCheckID {
if checks[1].CheckID != structs.SerfCheckID {
t.Fatalf("Bad: %v", checks[1])
}
}

View File

@ -18,14 +18,8 @@ import (
)
const (
SerfCheckID types.CheckID = "serfHealth"
SerfCheckName = "Serf Health Status"
SerfCheckAliveOutput = "Agent alive and reachable"
SerfCheckFailedOutput = "Agent not live or unreachable"
ConsulServiceID = "consul"
ConsulServiceName = "consul"
newLeaderEvent = "consul:new-leader"
barrierWriteTimeout = 2 * time.Minute
newLeaderEvent = "consul:new-leader"
barrierWriteTimeout = 2 * time.Minute
)
// monitorLeadership is used to monitor if we acquire or lose our role
@ -334,7 +328,7 @@ func (s *Server) reconcileReaped(known map[string]struct{}) error {
}
for _, check := range checks {
// Ignore any non serf checks
if check.CheckID != SerfCheckID {
if check.CheckID != structs.SerfCheckID {
continue
}
@ -359,7 +353,7 @@ func (s *Server) reconcileReaped(known map[string]struct{}) error {
}
serverPort := 0
for _, service := range services.Services {
if service.ID == ConsulServiceID {
if service.ID == structs.ConsulServiceID {
serverPort = service.Port
break
}
@ -430,8 +424,8 @@ func (s *Server) handleAliveMember(member serf.Member) error {
var service *structs.NodeService
if valid, parts := agent.IsConsulServer(member); valid {
service = &structs.NodeService{
ID: ConsulServiceID,
Service: ConsulServiceName,
ID: structs.ConsulServiceID,
Service: structs.ConsulServiceName,
Port: parts.Port,
}
@ -473,7 +467,7 @@ func (s *Server) handleAliveMember(member serf.Member) error {
return err
}
for _, check := range checks {
if check.CheckID == SerfCheckID && check.Status == api.HealthPassing {
if check.CheckID == structs.SerfCheckID && check.Status == api.HealthPassing {
return nil
}
}
@ -490,10 +484,10 @@ AFTER_CHECK:
Service: service,
Check: &structs.HealthCheck{
Node: member.Name,
CheckID: SerfCheckID,
Name: SerfCheckName,
CheckID: structs.SerfCheckID,
Name: structs.SerfCheckName,
Status: api.HealthPassing,
Output: SerfCheckAliveOutput,
Output: structs.SerfCheckAliveOutput,
},
// If there's existing information about the node, do not
@ -520,7 +514,7 @@ func (s *Server) handleFailedMember(member serf.Member) error {
return err
}
for _, check := range checks {
if check.CheckID == SerfCheckID && check.Status == api.HealthCritical {
if check.CheckID == structs.SerfCheckID && check.Status == api.HealthCritical {
return nil
}
}
@ -535,10 +529,10 @@ func (s *Server) handleFailedMember(member serf.Member) error {
Address: member.Addr.String(),
Check: &structs.HealthCheck{
Node: member.Name,
CheckID: SerfCheckID,
Name: SerfCheckName,
CheckID: structs.SerfCheckID,
Name: structs.SerfCheckName,
Status: api.HealthCritical,
Output: SerfCheckFailedOutput,
Output: structs.SerfCheckFailedOutput,
},
// If there's existing information about the node, do not

View File

@ -53,10 +53,10 @@ func TestLeader_RegisterMember(t *testing.T) {
if len(checks) != 1 {
t.Fatalf("client missing check")
}
if checks[0].CheckID != SerfCheckID {
if checks[0].CheckID != structs.SerfCheckID {
t.Fatalf("bad check: %v", checks[0])
}
if checks[0].Name != SerfCheckName {
if checks[0].Name != structs.SerfCheckName {
t.Fatalf("bad check: %v", checks[0])
}
if checks[0].Status != api.HealthPassing {
@ -125,10 +125,10 @@ func TestLeader_FailedMember(t *testing.T) {
if len(checks) != 1 {
t.Fatalf("client missing check")
}
if checks[0].CheckID != SerfCheckID {
if checks[0].CheckID != structs.SerfCheckID {
t.Fatalf("bad check: %v", checks[0])
}
if checks[0].Name != SerfCheckName {
if checks[0].Name != structs.SerfCheckName {
t.Fatalf("bad check: %v", checks[0])
}
@ -270,8 +270,8 @@ func TestLeader_Reconcile_ReapMember(t *testing.T) {
Address: "127.1.1.1",
Check: &structs.HealthCheck{
Node: "no-longer-around",
CheckID: SerfCheckID,
Name: SerfCheckName,
CheckID: structs.SerfCheckID,
Name: structs.SerfCheckName,
Status: api.HealthCritical,
},
WriteRequest: structs.WriteRequest{
@ -378,8 +378,8 @@ func TestLeader_Reconcile_Races(t *testing.T) {
NodeMeta: map[string]string{"hello": "world"},
Check: &structs.HealthCheck{
Node: c1.config.NodeName,
CheckID: SerfCheckID,
Name: SerfCheckName,
CheckID: structs.SerfCheckID,
Name: structs.SerfCheckName,
Status: api.HealthCritical,
Output: "",
},

View File

@ -0,0 +1,21 @@
package structs
import (
"github.com/hashicorp/consul/types"
)
// These are used to manage the built-in "serfHealth" check that's attached
// to every node in the catalog.
const (
SerfCheckID types.CheckID = "serfHealth"
SerfCheckName = "Serf Health Status"
SerfCheckAliveOutput = "Agent alive and reachable"
SerfCheckFailedOutput = "Agent not live or unreachable"
)
// These are used to manage the "consul" service that's attached to every Consul
// server node in the catalog.
const (
ConsulServiceID = "consul"
ConsulServiceName = "consul"
)

View File

@ -9,7 +9,6 @@ import (
"sync/atomic"
"time"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/consul/structs"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/lib"
@ -483,6 +482,11 @@ func (l *localState) setSyncState() error {
// If we don't have the service locally, deregister it
existing, ok := l.services[id]
if !ok {
// The consul service is created automatically, and does
// not need to be deregistered.
if id == structs.ConsulServiceID {
continue
}
l.serviceStatus[id] = syncStatus{inSync: false}
continue
}
@ -517,8 +521,8 @@ func (l *localState) setSyncState() error {
existing, ok := l.checks[id]
if !ok {
// The Serf check is created automatically, and does not
// need to be registered
if id == consul.SerfCheckID {
// need to be deregistered.
if id == structs.SerfCheckID {
continue
}
l.checkStatus[id] = syncStatus{inSync: false}

View File

@ -161,7 +161,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) {
if !reflect.DeepEqual(serv, srv6) {
r.Fatalf("bad: %v %v", serv, srv6)
}
case "consul":
case structs.ConsulServiceID:
// ignore
default:
r.Fatalf("unexpected service: %v", id)
@ -173,10 +173,10 @@ func TestAgentAntiEntropy_Services(t *testing.T) {
defer a.state.RUnlock()
// Check the local state
if len(a.state.services) != 6 {
if len(a.state.services) != 5 {
r.Fatalf("bad: %v", a.state.services)
}
if len(a.state.serviceStatus) != 6 {
if len(a.state.serviceStatus) != 5 {
r.Fatalf("bad: %v", a.state.serviceStatus)
}
for name, status := range a.state.serviceStatus {
@ -222,7 +222,7 @@ func TestAgentAntiEntropy_Services(t *testing.T) {
if !reflect.DeepEqual(serv, srv6) {
r.Fatalf("bad: %v %v", serv, srv6)
}
case "consul":
case structs.ConsulServiceID:
// ignore
default:
r.Fatalf("unexpected service: %v", id)
@ -234,10 +234,10 @@ func TestAgentAntiEntropy_Services(t *testing.T) {
defer a.state.RUnlock()
// Check the local state
if len(a.state.services) != 5 {
if len(a.state.services) != 4 {
r.Fatalf("bad: %v", a.state.services)
}
if len(a.state.serviceStatus) != 5 {
if len(a.state.serviceStatus) != 4 {
r.Fatalf("bad: %v", a.state.serviceStatus)
}
for name, status := range a.state.serviceStatus {
@ -333,7 +333,7 @@ func TestAgentAntiEntropy_EnableTagOverride(t *testing.T) {
!reflect.DeepEqual(serv.Tags, []string{"tag2"}) {
r.Fatalf("bad: %v %v", serv, srv2)
}
case "consul":
case structs.ConsulServiceID:
// ignore
default:
r.Fatalf("unexpected service: %v", id)
@ -575,7 +575,7 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
if !reflect.DeepEqual(serv, srv2) {
t.Fatalf("bad: %#v %#v", serv, srv2)
}
case "consul":
case structs.ConsulServiceID:
// ignore
default:
t.Fatalf("unexpected service: %v", id)
@ -588,10 +588,10 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
defer a.state.RUnlock()
// Check the local state
if len(a.state.services) != 3 {
if len(a.state.services) != 2 {
t.Fatalf("bad: %v", a.state.services)
}
if len(a.state.serviceStatus) != 3 {
if len(a.state.serviceStatus) != 2 {
t.Fatalf("bad: %v", a.state.serviceStatus)
}
for name, status := range a.state.serviceStatus {
@ -634,7 +634,7 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
t.Fatalf("should not be permitted")
case "api":
t.Fatalf("should be deleted")
case "consul":
case structs.ConsulServiceID:
// ignore
default:
t.Fatalf("unexpected service: %v", id)
@ -647,10 +647,10 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
defer a.state.RUnlock()
// Check the local state
if len(a.state.services) != 2 {
if len(a.state.services) != 1 {
t.Fatalf("bad: %v", a.state.services)
}
if len(a.state.serviceStatus) != 2 {
if len(a.state.serviceStatus) != 1 {
t.Fatalf("bad: %v", a.state.serviceStatus)
}
for name, status := range a.state.serviceStatus {
@ -975,7 +975,7 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
if !reflect.DeepEqual(serv, srv2) {
t.Fatalf("bad: %#v %#v", serv, srv2)
}
case "consul":
case structs.ConsulServiceID:
// ignore
default:
t.Fatalf("unexpected service: %v", id)
@ -988,10 +988,10 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
defer a.state.RUnlock()
// Check the local state
if len(a.state.services) != 3 {
if len(a.state.services) != 2 {
t.Fatalf("bad: %v", a.state.services)
}
if len(a.state.serviceStatus) != 3 {
if len(a.state.serviceStatus) != 2 {
t.Fatalf("bad: %v", a.state.serviceStatus)
}
for name, status := range a.state.serviceStatus {

View File

@ -6,7 +6,6 @@ import (
"strings"
"time"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/consul/structs"
"github.com/hashicorp/consul/types"
)
@ -39,7 +38,7 @@ func (s *HTTPServer) SessionCreate(resp http.ResponseWriter, req *http.Request)
Op: structs.SessionCreate,
Session: structs.Session{
Node: s.agent.config.NodeName,
Checks: []types.CheckID{consul.SerfCheckID},
Checks: []types.CheckID{structs.SerfCheckID},
LockDelay: 15 * time.Second,
Behavior: structs.SessionKeysRelease,
TTL: "",

View File

@ -8,7 +8,6 @@ import (
"testing"
"time"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/consul/structs"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/types"
@ -43,7 +42,7 @@ func TestSessionCreate(t *testing.T) {
raw := map[string]interface{}{
"Name": "my-cool-session",
"Node": a.Config.NodeName,
"Checks": []types.CheckID{consul.SerfCheckID, "consul"},
"Checks": []types.CheckID{structs.SerfCheckID, "consul"},
"LockDelay": "20s",
}
enc.Encode(raw)
@ -89,7 +88,7 @@ func TestSessionCreateDelete(t *testing.T) {
raw := map[string]interface{}{
"Name": "my-cool-session",
"Node": a.Config.NodeName,
"Checks": []types.CheckID{consul.SerfCheckID, "consul"},
"Checks": []types.CheckID{structs.SerfCheckID, "consul"},
"LockDelay": "20s",
"Behavior": structs.SessionKeysDelete,
}

View File

@ -46,8 +46,8 @@ Tokens are bound to a set of rules that control which Consul resources the token
has access to. Policies can be defined in either a whitelist or blacklist mode
depending on the configuration of
[`acl_default_policy`](/docs/agent/options.html#acl_default_policy). If the default
policy is to "deny all" actions, then token rules can be set to whitelist specific
actions. In the inverse, the "allow all" default behavior is a blacklist where rules
policy is to "deny" all actions, then token rules can be set to whitelist specific
actions. In the inverse, the "allow" all default behavior is a blacklist where rules
are used to prohibit actions. By default, Consul will allow all actions.
The following table summarizes the ACL policies that are available for constructing
@ -100,10 +100,17 @@ the cache TTL is an upper bound on the staleness of policy that is enforced. It
possible to set a zero TTL, but this has adverse performance impacts, as every
request requires refreshing the policy via an RPC call.
#### Enabling ACLs
During an outage of the ACL datacenter, or loss of connectivity, the cache will be
used as long as the TTL is valid, or the cache may be extended if the
[`acl_down_policy`](/docs/agent/options.html#acl_down_policy) is set accordingly.
This configuration also allows the ACL system to fail open or closed.
[ACL replication](#replication) is also available to allow for the full set of ACL
tokens to be replicated for use during an outage.
Enabling ACLs is done by setting up the following configuration options. These are
marked as to whether they are set on servers, clients, or both.
#### Configuring ACLs
ACLs are configured using several different configuration options. These are marked
as to whether they are set on servers, clients, or both.
| Configuration Option | Servers | Clients | Purpose |
| -------------------- | ------- | ------- | ------- |
@ -122,13 +129,43 @@ system, or accessing Consul in special situations:
| Special Token | Servers | Clients | Purpose |
| ------------- | ------- | ------- | ------- |
| [`acl_agent_master_token`](/docs/agent/options.html#acl_agent_master_token) | `OPTIONAL` | `OPTIONAL` | Special token that can be used to access [Agent API](/api/agent.html) when the ACL datacenter isn't available, or servers are offline (for clients); used for setting up the cluster such as doing initial join operations |
| [`acl_agent_token`](/docs/agent/options.html#acl_agent_token) | `OPTIONAL` | `OPTIONAL` | Special token that is used for an agent's internal operations with the [Catalog API](/api/catalog.html); this needs to have at least `node` policy access so the agent can self update its registration information |
| [`acl_master_token`](/docs/agent/options.html#acl_master_token) | `REQUIRED` | `N/A` | Special token used to bootstrap the ACL system, see details below. |
| [`acl_agent_token`](/docs/agent/options.html#acl_agent_token) | `OPTIONAL` | `OPTIONAL` | Special token that is used for an agent's internal operations with the [Catalog API](/api/catalog.html); this needs to have at least `node` policy access so the agent can self update its registration information, and also needs `service` read access for all services that will be registered with that node for [anti-entropy](/docs/internals/anti-entropy.html) syncing |
| [`acl_master_token`](/docs/agent/options.html#acl_master_token) | `REQUIRED` | `N/A` | Special token used to bootstrap the ACL system, see details below |
| [`acl_token`](/docs/agent/options.html#acl_token) | `OPTIONAL` | `OPTIONAL` | Default token to use for client requests where no token is supplied; this is often configured with read-only access to services to enable DNS service discovery on agents |
Bootstrapping the ACL system is done by providing an initial
[`acl_master_token`](/docs/agent/options.html#acl_master_token) which will be created
as a "management" type token if it does not exist. The
#### Bootstrapping ACLs
Bootstrapping ACLs on a new cluster requires a few steps, outlined in the example in this
section.
**Enable ACLs on the Consul Servers**
The first step for bootstrapping ACLs is to enable ACLs on the Consul servers in the ACL
datacenter. In this example, we are configuring the following:
1. An ACL datacenter of "dc1", which is where these servers are
2. An ACL master token of "b1gs33cr3t"
3. A default policy of "deny" which means we are in whitelist mode
4. A down policy of "extend-cache" which means that we will ignore token TTLs during an
outage
Here's the corresponding JSON configuration file:
```json
{
"acl_datacenter": "dc1",
"acl_master_token": "b1gs33cr3t",
"acl_default_policy": "deny",
"acl_down_policy": "extend-cache"
}
```
The servers will need to be restarted to load the new configuration. Please take care
to start the servers one at a time, and ensure each server has joined and is operating
correctly before starting another.
The [`acl_master_token`](/docs/agent/options.html#acl_master_token) will be created
as a "management" type token automatically. The
[`acl_master_token`](/docs/agent/options.html#acl_master_token) is only installed when
a server acquires cluster leadership. If you would like to install or change the
[`acl_master_token`](/docs/agent/options.html#acl_master_token), set the new value for
@ -138,6 +175,227 @@ for all servers. Once this is done, restart the current leader to force a leader
Once the ACL system is bootstrapped, ACL tokens can be managed through the
[ACL API](/api/acl.html).
**Create an Agent Token**
After the servers are restarted above, you will see new errors in the logs of the Consul
servers related to permission denied errors:
```
2017/07/08 23:38:24 [WARN] agent: Node info update blocked by ACLs
2017/07/08 23:38:44 [WARN] agent: Coordinate update blocked by ACLs
```
These errors are because the agent doesn't yet have a properly configured
[`acl_agent_token`](/docs/agent/options.html#acl_agent_token) that it can use for its
own internal operations like updating its node information in the catalog, and performing
[anti-entropy](/docs/internals/anti-entropy.html) syncing. We can create a token using the
ACL API, and the ACL master token we set in the previous step:
```
$ curl \
--request PUT \
--header "X-Consul-Token: b1gs33cr3t" \
--data \
'{
"Name": "Agent Token",
"Type": "client",
"Rules": "node \"\" { policy = \"write\" } service \"\" { policy = \"read\" }"
}' http://127.0.0.1:8500/v1/acl/create
{"ID":"fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1"}
```
The returned value is the newly-created token. We can now add this to our Consul server
configuration and restart the servers once more to apply it:
```json
{
"acl_datacenter": "dc1",
"acl_master_token": "b1gs33cr3t",
"acl_default_policy": "deny",
"acl_down_policy": "extend-cache",
"acl_agent_token": "fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1"
}
```
With that ACL agent token set, the servers will be able to sync themselves with the
catalog:
```
2017/07/08 23:42:59 [INFO] agent: Synced node info
```
**Enable ACLs on the Consul Clients**
Since ACL enforcement also occurs on the Consul clients, we need to also restart them
with a configuration file that enables ACLs:
```json
{
"acl_datacenter": "dc1",
"acl_down_policy": "extend-cache",
"acl_agent_token": "fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1"
}
```
We used the same ACL agent token that we created for the servers, which will work since
it was not specific to any node or set of service prefixes. In a more locked-down
environment it is recommended that each client get an ACL agent token with `node` write
privileges for just its own node name prefix, and `service` read privileges for just the
service prefixes expected to be registered on that client.
[Anti-entropy](/docs/internals/anti-entropy.html) syncing requires the ACL agent token
to have `service` read privileges for all services that may be registered with the agent,
so generally an empty `service` prefix can be used, as shown in the example.
Clients will report similar permission denied errors until they are restarted with an ACL
agent token.
**Set an Anonymous Policy (Optional)**
At this point ACLs are bootstrapped with ACL agent tokens configured, but there are no
other policies set up. Even basic operations like `consul members` will be restricted
by the ACL default policy of "deny":
```
$ consul members
```
We don't get an error since the ACL has filtered what we see, and we aren't allowed to
see any nodes by default.
If we supply the token we created above we will be able to see a listing of nodes because
it has write privileges to an empty `node` prefix, meaning it has access to all nodes:
```
$ CONSUL_HTTP_TOKEN=fe3b8d40-0ee0-8783-6cc2-ab1aa9bb16c1 consul members
Node Address Status Type Build Protocol DC
node-1 127.0.0.1:8301 alive server 0.9.0dev 2 dc1
node-2 127.0.0.2:8301 alive client 0.9.0dev 2 dc1
```
It's pretty common in many environments to allow listing of all nodes, even without a
token. The policies associated with the special anonymous token can be updated to
configure Consul's behavior when no token is supplied. The anonymous token is managed
like any other ACL token, except that `anonymous` is used for the ID. In this example
we will give the anonymous token read privileges for all nodes:
```
$ curl \
--request PUT \
--header "X-Consul-Token: b1gs33cr3t" \
--data \
'{
"ID": "anonymous",
"Type": "client",
"Rules": "node \"\" { policy = \"read\" }"
}' http://127.0.0.1:8500/v1/acl/update
{"ID":"anonymous"}
```
The anonymous token is implicitly used if no token is supplied, so now we can run
`consul members` without supplying a token and we will be able to see the nodes:
```
$ consul members
Node Address Status Type Build Protocol DC
node-1 127.0.0.1:8301 alive server 0.9.0dev 2 dc1
node-2 127.0.0.2:8301 alive client 0.9.0dev 2 dc1
```
The anonymous token is also used for DNS lookups since there's no way to pass a
token as part of a DNS request. Here's an example lookup for the "consul" service:
```
$ dig @127.0.0.1 -p 8600 consul.service.consul
; <<>> DiG 9.8.3-P1 <<>> @127.0.0.1 -p 8600 consul.service.consul
; (1 server found)
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NXDOMAIN, id: 9648
;; flags: qr aa rd; QUERY: 1, ANSWER: 0, AUTHORITY: 1, ADDITIONAL: 0
;; WARNING: recursion requested but not available
;; QUESTION SECTION:
;consul.service.consul. IN A
;; AUTHORITY SECTION:
consul. 0 IN SOA ns.consul. postmaster.consul. 1499584110 3600 600 86400 0
;; Query time: 2 msec
;; SERVER: 127.0.0.1#8600(127.0.0.1)
;; WHEN: Sun Jul 9 00:08:30 2017
;; MSG SIZE rcvd: 89
```
Now we get an `NXDOMAIN` error because the anonymous token doesn't have access to the
"consul" service. Let's add that to the anonymous token's policy:
```
$ curl \
--request PUT \
--header "X-Consul-Token: b1gs33cr3t" \
--data \
'{
"ID": "anonymous",
"Type": "client",
"Rules": "node \"\" { policy = \"read\" } service \"consul\" { policy = \"read\" }"
}' http://127.0.0.1:8500/v1/acl/update
{"ID":"anonymous"}
```
With that new policy in place, the DNS lookup will succeed:
```
$ dig @127.0.0.1 -p 8600 consul.service.consul
; <<>> DiG 9.8.3-P1 <<>> @127.0.0.1 -p 8600 consul.service.consul
; (1 server found)
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 46006
;; flags: qr aa rd; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 0
;; WARNING: recursion requested but not available
;; QUESTION SECTION:
;consul.service.consul. IN A
;; ANSWER SECTION:
consul.service.consul. 0 IN A 127.0.0.1
;; Query time: 0 msec
;; SERVER: 127.0.0.1#8600(127.0.0.1)
;; WHEN: Sun Jul 9 00:11:14 2017
;; MSG SIZE rcvd: 55
```
The next section shows an alternative to the anonymous token.
**Set Agent-specific Default Tokens (Optional)**
An alternative to the anonymous token is the [`acl_token`](/docs/agent/options.html#acl_token)
configuration item. When a request is made to a particular Consul agent and no token is
supplied, the [`acl_token`](/docs/agent/options.html#acl_token) will be used for the token,
instead of being left empty which would normally invoke the anonymous token.
This behaves very similarly to the anonymous token, but can be configured differently on each
agent, if desired. For example, this allows more fine grained control of what DNS requests a
given agent can service, or can give the agent read access to some key-value store prefixes by
default.
If using [`acl_token`](/docs/agent/options.html#acl_token), then it's likely the anonymous
token will have a more restrictive policy than shown in the examples here.
**Next Steps**
The examples above configure a basic ACL environment with the ability to see all nodes
by default, and limited access to just the "consul" service. The [ACL API](/api/acl.html)
can be used to create tokens for applications specific to their intended use, and to create
more specific ACL agent tokens for each agent's expected role.
## Rule Specification
A core part of the ACL system is the rule language which is used to describe the policy