e2e: e2e test for connect with consul acls

Provide script for managing Consul ACLs on a TF provisioned cluster for
e2e testing. Script can be used to 'enable' or 'disable' Consul ACLs,
and automatically takes care of the bootstrapping process if necessary.

The bootstrapping process takes a long time, so we may need to
extend the overall e2e timeout (20 minutes seems fine).

Introduces basic tests for Consul Connect with ACLs.
This commit is contained in:
Seth Hoenig 2020-01-13 10:13:07 -06:00
parent 4152254c3a
commit fc498c2b96
15 changed files with 1111 additions and 6 deletions

410
e2e/connect/acls.go Normal file
View File

@ -0,0 +1,410 @@
package connect
import (
"os"
"strings"
"testing"
"time"
capi "github.com/hashicorp/consul/api"
consulapi "github.com/hashicorp/consul/api"
napi "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/consulacls"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/jobspec"
"github.com/kr/pretty"
"github.com/stretchr/testify/require"
)
const (
// envConsulToken is the consul http token environment variable
envConsulToken = "CONSUL_HTTP_TOKEN"
// demoConnectJob is the example connect enabled job useful for testing
demoConnectJob = "connect/input/demo.nomad"
)
type ConnectACLsE2ETest struct {
framework.TC
// manageConsulACLs is used to 'enable' and 'disable' Consul ACLs in the
// Consul Cluster that has been setup for e2e testing.
manageConsulACLs consulacls.Manager
// consulMasterToken is set to the generated Consul ACL token after using
// the consul-acls-manage.sh script to enable ACLs.
consulMasterToken string
// things to cleanup after each test case
jobIDs []string
consulPolicyIDs []string
consulTokenIDs []string
}
func (tc *ConnectACLsE2ETest) BeforeAll(f *framework.F) {
// Wait for Nomad to be ready before doing anything.
e2eutil.WaitForLeader(f.T(), tc.Nomad())
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 2)
// Now enable Consul ACLs, the bootstrapping process for which will be
// managed automatically if needed.
var err error
tc.manageConsulACLs, err = consulacls.New(consulacls.DefaultTFStateFile)
require.NoError(f.T(), err)
tc.enableConsulACLs(f)
// Sanity check the consul master token exists, otherwise tests are just
// going to be a train wreck.
tokenLength := len(tc.consulMasterToken)
require.Equal(f.T(), 36, tokenLength, "consul master token wrong length")
// Sanity check the CONSUL_HTTP_TOKEN is NOT set, because that will cause
// the agent checks to fail (which do not allow having a token set (!)).
consulTokenEnv := os.Getenv(envConsulToken)
require.Empty(f.T(), consulTokenEnv)
// Wait for Nomad to be ready _again_, since everything was restarted during
// the bootstrap process.
e2eutil.WaitForLeader(f.T(), tc.Nomad())
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 2)
}
// enableConsulACLs effectively executes `consul-acls-manage.sh enable`, which
// will activate Consul ACLs, going through the bootstrap process if necessary.
func (tc *ConnectACLsE2ETest) enableConsulACLs(f *framework.F) {
tc.consulMasterToken = tc.manageConsulACLs.Enable(f.T())
}
// AfterAll runs after all tests are complete.
//
// We disable ConsulACLs in here to isolate the use of Consul ACLs only to
// test suites that explicitly want to test with them enabled.
func (tc *ConnectACLsE2ETest) AfterAll(f *framework.F) {
tc.disableConsulACLs(f)
}
// disableConsulACLs effectively executes `consul-acls-manage.sh disable`, which
// will de-activate Consul ACLs.
func (tc *ConnectACLsE2ETest) disableConsulACLs(f *framework.F) {
tc.manageConsulACLs.Disable(f.T())
}
// AfterEach does cleanup of Consul ACL objects that were created during each
// test case. Each test case may assume it is starting from a "fresh" state -
// as if the consul ACL bootstrap process had just taken place.
func (tc *ConnectACLsE2ETest) AfterEach(f *framework.F) {
if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
return
}
t := f.T()
r := require.New(t)
// cleanup jobs
for _, id := range tc.jobIDs {
t.Log("cleanup: deregister nomad job id:", id)
_, _, err := tc.Nomad().Jobs().Deregister(id, true, nil)
r.NoError(err)
}
// cleanup consul tokens
for _, id := range tc.consulTokenIDs {
t.Log("cleanup: delete consul token id:", id)
_, err := tc.Consul().ACL().TokenDelete(id, &capi.WriteOptions{Token: tc.consulMasterToken})
r.NoError(err)
}
// cleanup consul policies
for _, id := range tc.consulPolicyIDs {
t.Log("cleanup: delete consul policy id:", id)
_, err := tc.Consul().ACL().PolicyDelete(id, &capi.WriteOptions{Token: tc.consulMasterToken})
r.NoError(err)
}
// do garbage collection
err := tc.Nomad().System().GarbageCollect()
r.NoError(err)
tc.jobIDs = []string{}
tc.consulTokenIDs = []string{}
tc.consulPolicyIDs = []string{}
}
type consulPolicy struct {
Name string // e.g. nomad-operator
Rules string // e.g. service "" { policy="write" }
}
func (tc *ConnectACLsE2ETest) createConsulPolicy(p consulPolicy, f *framework.F) string {
r := require.New(f.T())
result, _, err := tc.Consul().ACL().PolicyCreate(&capi.ACLPolicy{
Name: p.Name,
Description: "test policy " + p.Name,
Rules: p.Rules,
}, &capi.WriteOptions{Token: tc.consulMasterToken})
r.NoError(err, "failed to create consul policy")
tc.consulPolicyIDs = append(tc.consulPolicyIDs, result.ID)
return result.ID
}
func (tc *ConnectACLsE2ETest) createOperatorToken(policyID string, f *framework.F) string {
r := require.New(f.T())
token, _, err := tc.Consul().ACL().TokenCreate(&capi.ACLToken{
Description: "operator token",
Policies: []*capi.ACLTokenPolicyLink{{ID: policyID}},
}, &capi.WriteOptions{Token: tc.consulMasterToken})
r.NoError(err, "failed to create operator token")
tc.consulTokenIDs = append(tc.consulTokenIDs, token.AccessorID)
return token.SecretID
}
// TODO: This is test is broken and requires an actual fix.
// We currently do not check if the provided operator token is a master token,
// and we need to do that to be consistent with the semantics of the Consul ACL
// system. Fix will be covered in a separate issue.
//
//func (tc *ConnectACLsE2ETest) TestConnectACLsRegister_MasterToken(f *framework.F) {
// t := f.T()
// r := require.New(t)
//
// t.Log("test register Connect job w/ ACLs enabled w/ master token")
//
// jobID := "connect" + uuid.Generate()[0:8]
// tc.jobIDs = append(tc.jobIDs, jobID)
//
// jobAPI := tc.Nomad().Jobs()
//
// job, err := jobspec.ParseFile(demoConnectJob)
// r.NoError(err)
//
// // Set the job file to use the consul master token.
// // One should never do this in practice, but, it should work.
// // https://www.consul.io/docs/acl/acl-system.html#builtin-tokens
// //
// // note: We cannot just set the environment variable when using the API
// // directly - that only works when using the nomad CLI command which does
// // the step of converting the environment variable into a set option.
// job.ConsulToken = &tc.consulMasterToken
//
// resp, _, err := jobAPI.Register(job, nil)
// r.NoError(err)
// r.NotNil(resp)
// r.Zero(resp.Warnings)
//}
//
func (tc *ConnectACLsE2ETest) TestConnectACLsRegister_MissingOperatorToken(f *framework.F) {
t := f.T()
r := require.New(t)
t.Log("test register Connect job w/ ACLs enabled w/o operator token")
job, err := jobspec.ParseFile(demoConnectJob)
r.NoError(err)
jobAPI := tc.Nomad().Jobs()
// Explicitly show the ConsulToken is not set
job.ConsulToken = nil
_, _, err = jobAPI.Register(job, nil)
r.Error(err)
t.Log("job correctly rejected, with error:", err)
}
func (tc *ConnectACLsE2ETest) TestConnectACLsRegister_FakeOperatorToken(f *framework.F) {
t := f.T()
r := require.New(t)
t.Log("test register Connect job w/ ACLs enabled w/ operator token")
policyID := tc.createConsulPolicy(consulPolicy{
Name: "nomad-operator-policy",
Rules: `service "count-api" { policy = "write" } service "count-dashboard" { policy = "write" }`,
}, f)
t.Log("created operator policy:", policyID)
// generate a fake consul token token
fakeToken := uuid.Generate()
job := tc.parseJobSpecFile(t, demoConnectJob)
jobAPI := tc.Nomad().Jobs()
// deliberately set the fake Consul token
job.ConsulToken = &fakeToken
// should fail, because the token is fake
_, _, err := jobAPI.Register(job, nil)
r.Error(err)
t.Log("job correctly rejected, with error:", err)
}
func (tc *ConnectACLsE2ETest) TestConnectACLs_ConnectDemo(f *framework.F) {
t := f.T()
r := require.New(t)
t.Log("test register Connect job w/ ACLs enabled w/ operator token")
// === Setup ACL policy and token ===
// create a policy allowing writes of services "count-api" and "count-dashboard"
policyID := tc.createConsulPolicy(consulPolicy{
Name: "nomad-operator-policy",
Rules: `service "count-api" { policy = "write" } service "count-dashboard" { policy = "write" }`,
}, f)
t.Log("created operator policy:", policyID)
// create a Consul "operator token" blessed with the above policy
operatorToken := tc.createOperatorToken(policyID, f)
t.Log("created operator token:", operatorToken)
// === Register the Nomad job ===
// parse the example connect jobspec file
jobID := "connect" + uuid.Generate()[0:8]
tc.jobIDs = append(tc.jobIDs, jobID)
job := tc.parseJobSpecFile(t, demoConnectJob)
job.ID = &jobID
jobAPI := tc.Nomad().Jobs()
// set the valid consul operator token
job.ConsulToken = &operatorToken
// registering the job should succeed
resp, _, err := jobAPI.Register(job, nil)
r.NoError(err)
r.NotNil(resp)
r.Empty(resp.Warnings)
t.Log("job has been registered with evalID:", resp.EvalID)
// === Make sure the evaluation actually succeeds ===
EVAL:
qOpts := &napi.QueryOptions{WaitIndex: resp.EvalCreateIndex}
evalAPI := tc.Nomad().Evaluations()
eval, qMeta, err := evalAPI.Info(resp.EvalID, qOpts)
r.NoError(err)
qOpts.WaitIndex = qMeta.LastIndex
switch eval.Status {
case "pending":
goto EVAL
case "complete":
// ok!
case "failed", "canceled", "blocked":
r.Failf("eval %s\n%s\n", eval.Status, pretty.Sprint(eval))
default:
r.Failf("unknown eval status: %s\n%s\n", eval.Status, pretty.Sprint(eval))
}
// assert there were no placement failures
r.Zero(eval.FailedTGAllocs, pretty.Sprint(eval.FailedTGAllocs))
r.Len(eval.QueuedAllocations, 2, pretty.Sprint(eval.QueuedAllocations))
// === Assert allocs are running ===
for i := 0; i < 20; i++ {
allocs, qMeta, err := evalAPI.Allocations(eval.ID, qOpts)
r.NoError(err)
r.Len(allocs, 2)
qOpts.WaitIndex = qMeta.LastIndex
running := 0
for _, alloc := range allocs {
switch alloc.ClientStatus {
case "running":
running++
case "pending":
// keep trying
default:
r.Failf("alloc failed", "alloc: %s", pretty.Sprint(alloc))
}
}
if running == len(allocs) {
break
}
time.Sleep(500 * time.Millisecond)
}
allocs, _, err := evalAPI.Allocations(eval.ID, qOpts)
r.NoError(err)
allocIDs := make(map[string]bool, 2)
for _, a := range allocs {
if a.ClientStatus != "running" || a.DesiredStatus != "run" {
r.Failf("terminal alloc", "alloc %s (%s) terminal; client=%s desired=%s", a.TaskGroup, a.ID, a.ClientStatus, a.DesiredStatus)
}
allocIDs[a.ID] = true
}
// === Check Consul service health ===
agentAPI := tc.Consul().Agent()
failing := map[string]*capi.AgentCheck{}
for i := 0; i < 60; i++ {
checks, err := agentAPI.Checks()
require.NoError(t, err)
// filter out checks for other services
for cid, check := range checks {
found := false
for allocID := range allocIDs {
if strings.Contains(check.ServiceID, allocID) {
found = true
break
}
}
if !found {
delete(checks, cid)
}
}
// ensure checks are all passing
failing = map[string]*consulapi.AgentCheck{}
for _, check := range checks {
if check.Status != "passing" {
failing[check.CheckID] = check
break
}
}
if len(failing) == 0 {
break
}
t.Logf("still %d checks not passing", len(failing))
time.Sleep(time.Second)
}
require.Len(t, failing, 0, pretty.Sprint(failing))
// === Check Consul SI tokens were generated for sidecars ===
aclAPI := tc.Consul().ACL()
entries, _, err := aclAPI.TokenList(&capi.QueryOptions{
Token: tc.consulMasterToken,
})
r.NoError(err)
foundSITokenForCountDash := false
foundSITokenForCountAPI := false
for _, entry := range entries {
if strings.Contains(entry.Description, "[connect-proxy-count-dashboard]") {
foundSITokenForCountDash = true
} else if strings.Contains(entry.Description, "[connect-proxy-count-api]") {
foundSITokenForCountAPI = true
}
}
r.True(foundSITokenForCountDash, "no SI token found for count-dash")
r.True(foundSITokenForCountAPI, "no SI token found for count-api")
t.Log("connect job with ACLs enable finished")
}
func (tc *ConnectACLsE2ETest) parseJobSpecFile(t *testing.T, filename string) *napi.Job {
job, err := jobspec.ParseFile(filename)
require.NoError(t, err)
return job
}

View File

@ -21,6 +21,7 @@ type ConnectE2ETest struct {
}
func init() {
// connect tests without Consul ACLs enabled
framework.AddSuites(&framework.TestSuite{
Component: "Connect",
CanRunLocal: true,
@ -30,6 +31,16 @@ func init() {
new(ConnectClientStateE2ETest),
},
})
// connect tests with Consul ACLs enabled
framework.AddSuites(&framework.TestSuite{
Component: "ConnectACLs",
CanRunLocal: false,
Consul: true,
Cases: []framework.TestCase{
new(ConnectACLsE2ETest),
},
})
}
func (tc *ConnectE2ETest) BeforeAll(f *framework.F) {

45
e2e/consulacls/README.md Normal file
View File

@ -0,0 +1,45 @@
# Configure Consul ACLs
This directory contains a set of scripts for re-configuring Consul in the TF
provisioned e2e environment to enable Consul ACLs.
## Usage
The `consul-acls-manage.sh` script can be used to manipulate the Consul cluster
to activate or de-activate Consul ACLs. There are 3 targets into the script, only
2 of which should be used from e2e framework tests.
### bootstrap
The command `consul-acls-manage.sh bootstrap` should *NOT* be used from e2e
framework tests. It's merely a convenience entry-point for doing development /
debugging on the script itself.
The bootstrap process will upload "reasonable" ACL policy files to Consul Servers,
Consul Clients, Nomad Servers, and Nomad Clients.
The bootstrap process creates a file on local disk which contains the generated
Consul ACL master token. The file is named based on the current TF state file
serial number. `/tmp/e2e-consul-bootstrap-<serial>.token`
### enable
The command `consul-acls-manage.sh enable` will enable Consul ACLs, going through
the bootstrap process only if necessary. Whether the bootstrap process is necessary
depends on the existence of a token file that matches the current TF state serial
number. If no associated token file exists for the current TF state, the bootstrap
process is required. Otherwise, the bootstrap process is skipped.
If the bootstrap process was not required (i.e. it already occurred and a
Consul master token already exists for the current TF state), the script will
activate ACLs in the Consul Server configurations and restart those agents. After
using `enable`, the `disable` command can be used to turn Consul ACLs back off,
without destroying any of the existing ACL configuration.
### disable
The command `consul-acls-manage.sh disable` will disable Consul ACLs. This does
not "cleanup" the policy files for Consul / Nomad agents, it merely deactivates
ACLs in the Consul Server configurations and restarts those agents. After using
`disable`, the `enable` command can be used to turn Consul ACLs back on, using
the same ACL token(s) generated before.

View File

@ -0,0 +1,6 @@
# This partial consul configuration file will disable Consul ACLs. The
# consul-acls-manage.sh script uploads this file as "acl.hcl" to Consul Server
# configuration directories, and restarts those agents.
acl = {
enabled = false
}

View File

@ -0,0 +1,8 @@
# This partial consul configuration file will enable Consul ACLs. The
# consul-acls-manage.sh script uploads this file as "acl.hcl" to Consul Server
# configuration directories, and restarts those agents.
acl = {
enabled = true
default_policy = "deny"
enable_token_persistence = true
}

View File

@ -0,0 +1,375 @@
#!/usr/bin/env bash
# must be run from e2e directory
set -o errexit
set -o nounset
set -o pipefail
tfstatefile="terraform/terraform.tfstate"
# Make sure we are running from the e2e/ directory
[ "$(basename "$(pwd)")" == "e2e" ] || (echo "must be run from nomad/e2e directory" && exit 1)
# Make sure one argument was provided (subcommand)
[ ${#} -eq 1 ] || (echo "expect one argument (subcommand)" && exit 1)
# Make sure terraform state file exists
[ -f "${tfstatefile}" ] || (echo "file ${tfstatefile} must exist (run terraform?)" && exit 1)
# Load Linux Client Node IPs from terraform state file
linux_clients=$(jq -r .outputs.linux_clients.value[] <"${tfstatefile}" | xargs)
# Load Windows Client Node IPs from terraform state file
windows_clients=$(jq -r .outputs.windows_clients.value[] <"${tfstatefile}" | xargs)
# Combine all the clients together
clients="${linux_clients} ${windows_clients}"
# Load Server Node IPs from terraform/terraform.tfstate
servers=$(jq -r .outputs.servers.value[] <"${tfstatefile}" | xargs)
# Use the 0th server as the ACL bootstrap server
server0=$(echo "${servers}" | cut -d' ' -f1)
# Find the .pem file to use
pemfile="terraform/$(jq -r '.resources[] | select(.name=="private_key_pem") | .instances[0].attributes.filename' <"terraform/terraform.tfstate")"
# See AWS service file
consul_configs="/etc/consul.d"
nomad_configs="/etc/nomad.d"
# Not really present in the config
user=ubuntu
# Create a filename based on the TF state file (.serial), where we will store and/or
# lookup the consul master token. The presense of this file is what determines
# whether a full ACL bootstrap must occur, or if we only need to activate ACLs
# whenever the "enable" sub-command is chosen.
token_file="/tmp/e2e-consul-bootstrap-$(jq .serial <${tfstatefile}).token"
# One argument - the subcommand to run which may be: bootstrap, enable, or disable
subcommand="${1}"
echo "==== SETUP configuration ====="
echo "SETUP command is: ${subcommand}"
echo "SETUP token file: ${token_file}"
echo "SETUP servers: ${servers}"
echo "SETUP linux clients: ${linux_clients}"
echo "SETUP windows clients: ${windows_clients}"
echo "SETUP pem file: ${pemfile}"
echo "SETUP consul configs: ${consul_configs}"
echo "SETUP nomad configs: ${nomad_configs}"
echo "SETUP aws user: ${user}"
echo "SETUP bootstrap server: ${server0}"
function doSSH() {
hostname="$1"
command="$2"
echo "-----> will ssh command '${command}' on ${hostname}"
ssh \
-o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null \
-i "${pemfile}" \
"${user}@${hostname}" "${command}"
}
function doSCP() {
original="$1"
username="$2"
hostname="$3"
destination="$4"
echo "------> will scp ${original} to ${hostname}"
scp \
-o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null \
-i "${pemfile}" \
"${original}" "${username}@${hostname}:${destination}"
}
function doBootstrap() {
echo "=== Bootstrap: Consul Configs ==="
# Stop all Nomad agents.
stopNomad
# Stop all Consul agents.
stopConsul
# Run the activation step, which uploads the ACLs-enabled acl.hcl file
# to each Consul Server's configuration directory, then (re)starts each
# Consul Server agent.
doActivate
echo "=== Bootstrap: Consul ACL Bootstrap ==="
# Bootstrap Consul ACLs on server[0]
echo "-> bootstrap ACL using ${server0}"
consul_http_token=$(doSSH "${server0}" "/usr/local/bin/consul acl bootstrap" | grep SecretID | awk '{print $2}')
consul_http_addr="http://${server0}:8500"
export CONSUL_HTTP_TOKEN=${consul_http_token}
export CONSUL_HTTP_ADDR=${consul_http_addr}
echo " consul http: ${CONSUL_HTTP_ADDR}"
echo " consul root: ${CONSUL_HTTP_TOKEN}"
echo "${CONSUL_HTTP_TOKEN}" > "${token_file}"
# Create Consul Server Policy & Consul Server agent tokens
echo "-> configure consul server policy"
consul acl policy create -name server-policy -rules @consulacls/consul-server-policy.hcl
# Create & Set agent token for each Consul Server
for server in ${servers}; do
echo "---> will create agent token for server ${server}"
server_agent_token=$(consul acl token create -description "consul server agent token" -policy-name server-policy | grep SecretID | awk '{print $2}')
echo "---> setting token for server agent: ${server} -> ${server_agent_token}"
consul acl set-agent-token agent "${server_agent_token}"
echo "---> done setting agent token for server ${server}"
done
# Wait 10s before continuing with configuring consul clients.
echo "-> sleep 10s"
sleep 10
# Start the Consul Clients back up so we can set their tokens now
startConsulClients
# Create Consul Client Policy & Client agent tokens
echo "-> configure consul client policy"
consul acl policy create -name client-policy -rules @consulacls/consul-client-policy.hcl
# Create & Set agent token for each Consul Client (including windows)
for client in ${clients}; do
echo "---> will create consul agent token for client ${client}"
client_agent_token=$(consul acl token create -description "consul client agent token" -policy-name client-policy | grep SecretID | awk '{print $2}')
echo "---> setting consul token for consul client ${client} -> ${client_agent_token}"
consul acl set-agent-token agent "${client_agent_token}"
echo "---> done setting consul agent token for client ${client}"
done
echo "=== Bootstrap: Nomad Configs ==="
# Create Nomad Server consul Policy and Nomad Server consul tokens
echo "-> configure nomad server policy & consul token"
consul acl policy create -name nomad-server-policy -rules @consulacls/nomad-server-policy.hcl
nomad_server_consul_token=$(consul acl token create -description "nomad server consul token" -policy-name nomad-server-policy | grep SecretID | awk '{print $2}')
nomad_server_consul_token_tmp=$(mktemp)
cp consulacls/nomad-server-consul.hcl "${nomad_server_consul_token_tmp}"
sed -i "s/CONSUL_TOKEN/${nomad_server_consul_token}/g" "${nomad_server_consul_token_tmp}"
for server in ${servers}; do
echo "---> upload nomad-server-consul.hcl to ${server}"
doSCP "${nomad_server_consul_token_tmp}" "${user}" "${server}" "/tmp/nomad-server-consul.hcl"
doSSH "${server}" "sudo mv /tmp/nomad-server-consul.hcl ${nomad_configs}/nomad-server-consul.hcl"
done
# Create Nomad Client consul Policy and Nomad Client consul token
echo "-> configure nomad client policy & consul token"
consul acl policy create -name nomad-client-policy -rules @consulacls/nomad-client-policy.hcl
nomad_client_consul_token=$(consul acl token create -description "nomad client consul token" -policy-name nomad-client-policy | grep SecretID | awk '{print $2}')
nomad_client_consul_token_tmp=$(mktemp)
cp consulacls/nomad-client-consul.hcl "${nomad_client_consul_token_tmp}"
sed -i "s/CONSUL_TOKEN/${nomad_client_consul_token}/g" "${nomad_client_consul_token_tmp}"
for linux_client in ${linux_clients}; do
echo "---> upload nomad-client-token.hcl to ${linux_client}"
doSCP "${nomad_client_consul_token_tmp}" "${user}" "${linux_client}" "/tmp/nomad-client-consul.hcl"
doSSH "${linux_client}" "sudo mv /tmp/nomad-client-consul.hcl ${nomad_configs}/nomad-client-consul.hcl"
done
startNomad
export NOMAD_ADDR="http://${server0}:4646"
echo "=== Activate: DONE ==="
}
function doEnable() {
if [ ! -f "${token_file}" ]; then
echo "ENABLE: token file does not exist, doing a full ACL bootstrap"
doBootstrap
else
echo "ENABLE: token file already exists, will activate ACLs"
doActivate
fi
echo "=== Enable: DONE ==="
# show the status of all the agents
echo "---> token file is ${token_file}"
consul_http_token=$(cat "${token_file}")
export CONSUL_HTTP_TOKEN="${consul_http_token}"
echo "export CONSUL_HTTP_TOKEN=${CONSUL_HTTP_TOKEN}"
doStatus
}
function doDisable() {
if [ ! -f "${token_file}" ]; then
echo "DISABLE: token file does not exist, did bootstrap ever happen?"
exit 1
else
echo "DISABLE: token file exists, will deactivate ACLs"
doDeactivate
fi
echo "=== Disable: DONE ==="
# show the status of all the agents
unset CONSUL_HTTP_TOKEN
doStatus
}
function doActivate() {
echo "=== Activate ==="
stopConsul
# Upload acl-enable.hcl to each Consul Server agent's configuration directory.
for server in ${servers}; do
echo " activate: upload acl-enable.hcl to ${server}::acl.hcl"
doSCP "consulacls/acl-enable.hcl" "${user}" "${server}" "/tmp/acl.hcl"
doSSH "${server}" "sudo mv /tmp/acl.hcl ${consul_configs}/acl.hcl"
done
# Restart each Consul Server agent to pickup the new config.
for server in ${servers}; do
echo " activate: restart Consul Server on ${server} ..."
doSSH "${server}" "sudo systemctl start consul"
sleep 1
done
sleep 10
startConsulClients
sleep 10
echo "=== Activate: DONE ==="
}
function stopNomad {
echo "=== Stop Nomad agents ==="
# Stop every Nomad agent (clients and servers) in preperation for Consul ACL
# bootstrapping.
for server in ${servers}; do
echo " stop Nomad Server on ${server}"
doSSH "${server}" "sudo systemctl stop nomad"
sleep 1
done
for linux_client in ${linux_clients}; do
echo " stop Nomad Client on ${linux_client}"
doSSH "${linux_client}" "sudo systemctl stop nomad"
sleep 1
done
echo "... all nomad agents stopped"
}
function startNomad {
echo "=== Start Nomad agents ==="
# Start every Nomad agent (clients and servers) after having Consul ACL
# bootstrapped and configurations set for Nomad.
for server in ${servers}; do
echo " start Nomad Server on ${server}"
doSSH "${server}" "sudo systemctl start nomad"
sleep 1
done
# give the servers a chance to settle
sleep 10
for linux_client in ${linux_clients}; do
echo " start Nomad Client on ${linux_client}"
doSSH "${linux_client}" "sudo systemctl start nomad"
sleep 3
done
# give the clients a long time to settle
sleep 30
echo "... all nomad agents started"
}
function stopConsul {
echo "=== Stop Consul agents ==="
# Stop every Nonsul agent (clients and servers) in preperation for Consul ACL
# bootstrapping.
for server in ${servers}; do
echo " stop Consul Server on ${server}"
doSSH "${server}" "sudo systemctl stop consul"
sleep 1
done
for linux_client in ${linux_clients}; do
echo " stop Consul Client on ${linux_client}"
doSSH "${linux_client}" "sudo systemctl stop consul"
sleep 1
done
echo "... all consul agents stopped"
}
function startConsulClients {
echo "=== Start Consul Clients ==="
# Start Consul Clients
for linux_client in ${linux_clients}; do
echo " start Consul Client on ${linux_client}"
doSSH "${linux_client}" "sudo systemctl start consul"
sleep 2
done
sleep 5 # let them settle
echo "... all consul clients started"
}
function doDeactivate {
echo "=== Deactivate ==="
# Upload acl-disable.hcl to each Consul Server agent's configuration directory.
for server in ${servers}; do
echo " deactivate: upload acl-disable.hcl to ${server}::acl.hcl"
doSCP "consulacls/acl-disable.hcl" "${user}" "${server}" "/tmp/acl.hcl"
doSSH "${server}" "sudo mv /tmp/acl.hcl ${consul_configs}/acl.hcl"
done
# Restart each Consul server agent to pickup the new config.
for server in ${servers}; do
echo " deactivate: restart Consul Server on ${server} ..."
doSSH "${server}" "sudo systemctl restart consul"
sleep 3 # let the agent settle
done
# Wait 10s before moving on, Consul needs a second to calm down.
echo " deactivate: sleep 10s ..."
sleep 10
}
function doStatus {
# assumes CONSUL_HTTP_TOKEN is set (or not)
echo "consul members"
consul members
echo ""
echo "nomad server members"
nomad server members
echo ""
echo "nomad node status"
nomad node status
echo ""
}
# It's the entrypoint to our script!
case "${subcommand}" in
bootstrap)
# The bootstrap target exists to make some local development easier. Test
# cases running from the e2e framework should always use "enable" which aims
# to be idempotent.
doBootstrap
;;
enable)
doEnable
;;
disable)
doDisable
;;
*)
echo "incorrect subcommand ${subcommand}"
exit 1
;;
esac

View File

@ -0,0 +1,25 @@
acl = "write"
agent "" {
policy = "write"
}
event "" {
policy = "write"
}
key "" {
policy = "write"
}
node "" {
policy = "write"
}
query "" {
policy = "write"
}
service "" {
policy = "write"
}

View File

@ -0,0 +1,26 @@
acl = "write"
agent "" {
policy = "write"
}
event "" {
policy = "write"
}
key "" {
policy = "write"
}
node "" {
policy = "write"
}
query "" {
policy = "write"
}
service "" {
policy = "write"
}

125
e2e/consulacls/manage.go Normal file
View File

@ -0,0 +1,125 @@
package consulacls
import (
"encoding/json"
"fmt"
"io/ioutil"
"strings"
"testing"
"github.com/hashicorp/nomad/e2e/framework/provisioning"
"github.com/pkg/errors"
"github.com/stretchr/testify/require"
)
// DefaultTFStateFile is the location of the TF state file, as created for the
// e2e test framework. This file is used to extract the TF serial number, which
// is used to determine whether the consul bootstrap process is necessary or has
// already taken place.
const DefaultTFStateFile = "terraform/terraform.tfstate"
// A Manager is used to manipulate whether Consul ACLs are enabled or disabled.
// Only works with TF provisioned clusters.
type Manager interface {
// Enable Consul ACLs in the Consul cluster. The Consul ACL master token
// associated with the Consul cluster is returned.
//
// A complete bootstrap process will take place if necessary.
//
// Once enabled, Consul ACLs can be disabled with Disable.
Enable(t *testing.T) string
// Disable Consul ACLs in the Consul Cluster.
//
// Once disabled, Consul ACLs can be re-enabled with Enable.
Disable(t *testing.T)
}
type tfManager struct {
serial int
token string
}
func New(tfStateFile string) (*tfManager, error) {
serial, err := extractSerial(tfStateFile)
if err != nil {
return nil, err
}
return &tfManager{
serial: serial,
}, nil
}
func (m *tfManager) Enable(t *testing.T) string {
// Create the local script runner that will be used to run the ACL management
// script, this time with the "enable" sub-command.
var runner provisioning.LinuxRunner
err := runner.Open(t)
require.NoError(t, err)
// Run the consul ACL bootstrap script, which will store the master token
// in the deterministic path based on the TF state serial number. If the
// bootstrap process had already taken place, ACLs will be activated but
// without going through the bootstrap process again, re-using the already
// existing Consul ACL master token.
err = runner.Run(strings.Join([]string{
"consulacls/consul-acls-manage.sh", "enable",
}, " "))
require.NoError(t, err)
// Read the Consul ACL master token that was generated (or if the token
// already existed because the bootstrap process had already taken place,
// that one).
token, err := m.readToken()
require.NoError(t, err)
return token
}
type tfState struct {
Serial int `json:"serial"`
}
// extractSerial will parse the TF state file looking for the serial number.
func extractSerial(filename string) (int, error) {
if filename == "" {
filename = DefaultTFStateFile
}
b, err := ioutil.ReadFile(filename)
if err != nil {
return 0, errors.Wrap(err, "failed to extract TF serial")
}
var state tfState
if err := json.Unmarshal(b, &state); err != nil {
return 0, errors.Wrap(err, "failed to extract TF serial")
}
return state.Serial, nil
}
// tokenPath returns the expected path for the Consul ACL master token generated
// by the consul-acls-manage.sh bootstrap script for the current TF serial version.
func (m *tfManager) tokenPath() string {
return fmt.Sprintf("/tmp/e2e-consul-bootstrap-%d.token", m.serial)
}
func (m *tfManager) readToken() (string, error) {
b, err := ioutil.ReadFile(m.tokenPath())
if err != nil {
return "", err
}
return strings.TrimSpace(string(b)), nil
}
func (m *tfManager) Disable(t *testing.T) {
// Create the local script runner that will be used to run the ACL management
// script, this time with the "disable" sub-command.
var runner provisioning.LinuxRunner
err := runner.Open(t)
require.NoError(t, err)
// Run the consul ACL bootstrap script, which will modify the Consul Server
// ACL policies to disable ACLs, and then restart those agents.
err = runner.Run(strings.Join([]string{
"consulacls/consul-acls-manage.sh", "disable",
}, " "))
require.NoError(t, err)
}

View File

@ -0,0 +1,4 @@
// The provided consul.token value must be blessed with service=write ACLs.
consul {
token="CONSUL_TOKEN"
}

View File

@ -0,0 +1,6 @@
// The Nomad Client will be registering things into its buddy Consul Client.
service "" {
policy = "write"
}

View File

@ -0,0 +1,8 @@
// Nomad Server needs to set allow_unauthenticated=false to enforce the use
// of a Consul Operator Token on job submission for Connect enabled jobs.
//
// The provided consul.token value must be blessed with acl=write ACLs.
consul {
allow_unauthenticated = false
token="CONSUL_TOKEN"
}

View File

@ -0,0 +1,6 @@
// The Nomad Server requires total access to Consul ACLs, because the Server
// will be requesting new SI tokens from Consul.
acl = "write"

View File

@ -187,7 +187,6 @@ func (f *Framework) Run(t *testing.T) {
}
})
}
}
// Run starts the package scoped Framework, running each TestSuite

View File

@ -1,25 +1,57 @@
package provisioning
import (
"context"
"fmt"
"log"
"os/exec"
"strings"
"testing"
"time"
"github.com/pkg/errors"
)
// LinuxRunner is a ProvisioningRunner that runs on the executing host only.
// The Nomad configurations used with this runner will need to avoid port
// conflicts!
type LinuxRunner struct{}
//
// Must call Open before other methods.
type LinuxRunner struct {
// populated on Open.
t *testing.T
}
func (runner *LinuxRunner) Open(_ *testing.T) error { return nil }
// Open sets up the LinuxRunner to run using t as a logging mechanism.
func (runner *LinuxRunner) Open(t *testing.T) error {
runner.t = t
return nil
}
func parseCommand(command string) (string, []string) {
fields := strings.Fields(strings.TrimSpace(command))
if len(fields) == 1 {
return fields[0], nil
}
return fields[0], fields[1:]
}
// Run the script (including any arguments)
func (runner *LinuxRunner) Run(script string) error {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
defer cancel()
commands := strings.Split(script, "\n")
for _, command := range commands {
cmd := exec.Command(strings.TrimSpace(command))
err := cmd.Run()
executable, args := parseCommand(command)
response, err := exec.CommandContext(ctx, executable, args...).CombinedOutput()
// Nothing fancy around separating stdin from stdout, or failed vs
// successful commands for now.
runner.LogOutput(string(response))
if err != nil {
return err
return errors.Wrapf(err, "failed to execute command %q", command)
}
}
return nil
@ -31,3 +63,22 @@ func (runner *LinuxRunner) Copy(local, remote string) error {
}
func (runner *LinuxRunner) Close() {}
func (runner *LinuxRunner) Logf(format string, args ...interface{}) {
if runner.t == nil {
log.Fatal("no t.Testing configured for LinuxRunner")
}
if testing.Verbose() {
fmt.Printf("[local] "+format+"\n", args...)
} else {
runner.t.Logf("[local] "+format, args...)
}
}
func (runner *LinuxRunner) LogOutput(output string) {
if testing.Verbose() {
fmt.Println("\033[32m" + output + "\033[0m")
} else {
runner.t.Log(output)
}
}