open-nomad/e2e/vaultsecrets/vaultsecrets.go

292 lines
8.6 KiB
Go

package vaultsecrets
import (
"context"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"regexp"
"strings"
"time"
e2e "github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/testutil"
)
const ns = ""
type VaultSecretsTest struct {
framework.TC
secretsPath string
pkiPath string
jobIDs []string
policies []string
}
func init() {
framework.AddSuites(&framework.TestSuite{
Component: "VaultSecrets",
CanRunLocal: true,
Consul: true,
Vault: true,
Cases: []framework.TestCase{
new(VaultSecretsTest),
},
})
}
func (tc *VaultSecretsTest) BeforeAll(f *framework.F) {
e2e.WaitForLeader(f.T(), tc.Nomad())
e2e.WaitForNodesReady(f.T(), tc.Nomad(), 1)
}
func (tc *VaultSecretsTest) AfterEach(f *framework.F) {
if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
return
}
for _, id := range tc.jobIDs {
_, err := e2e.Command("nomad", "job", "stop", "-purge", id)
f.Assert().NoError(err, "could not clean up job", id)
}
tc.jobIDs = []string{}
for _, policy := range tc.policies {
_, err := e2e.Command("vault", "policy", "delete", policy)
f.Assert().NoError(err, "could not clean up vault policy", policy)
}
tc.policies = []string{}
// disabling the secrets engines will wipe all the secrets as well
_, err := e2e.Command("vault", "secrets", "disable", tc.secretsPath)
f.Assert().NoError(err)
_, err = e2e.Command("vault", "secrets", "disable", tc.pkiPath)
f.Assert().NoError(err)
_, err = e2e.Command("nomad", "system", "gc")
f.NoError(err)
}
func (tc *VaultSecretsTest) TestVaultSecrets(f *framework.F) {
// use a random suffix to encapsulate test keys, polices, etc.
// for cleanup from vault
testID := uuid.Generate()[0:8]
jobID := "test-vault-secrets-" + testID
tc.secretsPath = "secrets-" + testID
tc.pkiPath = "pki-" + testID
secretValue := uuid.Generate()
secretKey := tc.secretsPath + "/data/myapp"
pkiCertIssue := tc.pkiPath + "/issue/nomad"
policyID := "access-secrets-" + testID
index := 0
wc := &e2e.WaitConfig{Retries: 500}
interval, retries := wc.OrDefault()
setupCmds := []string{
// configure KV secrets engine
// Note: the secret key is written to 'secret-###/myapp' but the kv2 API
// for Vault implicitly turns that into 'secret-###/data/myapp' so we
// need to use the longer path for everything other than kv put/get
fmt.Sprintf("vault secrets enable -path=%s kv-v2", tc.secretsPath),
fmt.Sprintf("vault kv put %s/myapp key=%s", tc.secretsPath, secretValue),
fmt.Sprintf("vault secrets tune -max-lease-ttl=1m %s", tc.secretsPath),
// configure PKI secrets engine
fmt.Sprintf("vault secrets enable -path=%s pki", tc.pkiPath),
fmt.Sprintf("vault write %s/root/generate/internal "+
"common_name=service.consul ttl=1h", tc.pkiPath),
fmt.Sprintf("vault write %s/roles/nomad "+
"allowed_domains=service.consul "+
"allow_subdomains=true "+
"generate_lease=true "+
"max_ttl=1m", tc.pkiPath),
fmt.Sprintf("vault secrets tune -max-lease-ttl=1m %s", tc.pkiPath),
}
for _, setupCmd := range setupCmds {
cmd := strings.Split(setupCmd, " ")
out, err := e2e.Command(cmd[0], cmd[1:]...)
f.NoError(err, fmt.Sprintf("error for %q:\n%s", setupCmd, out))
}
// we can't set an empty policy in our job, so write a bogus policy that
// doesn't have access to any of the paths we're using
out, err := writePolicy(policyID, "./vaultsecrets/input/policy-bad.hcl", testID)
f.NoError(err, out)
tc.policies = append(tc.policies, policyID)
index++
err = runJob(jobID, testID, index)
f.NoError(err, "could not register job")
tc.jobIDs = append(tc.jobIDs, jobID)
// job doesn't have access to secrets, so they can't start
err = e2e.WaitForAllocStatusExpected(jobID, ns, []string{"pending"})
f.NoError(err, "expected pending allocation")
// we should get a task event about why they can't start
expect := fmt.Sprintf("Missing: vault.read(%s), vault.write(%s", secretKey, pkiCertIssue)
allocID, err := latestAllocID(jobID)
f.NoError(err)
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(interval)
out, err := e2e.Command("nomad", "alloc", "status", allocID)
f.NoError(err, "could not get allocation status")
return strings.Contains(out, expect),
fmt.Errorf("expected '%s', got\n%v", expect, out)
}, func(e error) {
f.NoError(e)
})
// write a working policy and redeploy
out, err = writePolicy(policyID, "./vaultsecrets/input/policy-good.hcl", testID)
f.NoError(err, out)
index++
err = runJob(jobID, testID, index)
f.NoError(err, "could not register job")
// record the rough start of vault token TTL window, so that we don't have
// to wait excessively later on
ttlStart := time.Now()
// job should be now unblocked
err = e2e.WaitForAllocStatusExpected(jobID, ns, []string{"running", "complete"})
f.NoError(err, "expected running allocation")
allocID, err = latestAllocID(jobID)
f.NoError(err)
renderedCert, err := waitForAllocSecret(allocID, "task", "/secrets/certificate.crt",
func(out string) bool {
return strings.Contains(out, "BEGIN CERTIFICATE")
}, wc)
f.NoError(err)
_, err = waitForAllocSecret(allocID, "task", "/secrets/access.key",
func(out string) bool {
return strings.Contains(out, secretValue)
}, wc)
f.NoError(err)
var re = regexp.MustCompile(`VAULT_TOKEN=(.*)`)
// check vault token was written and save it for later comparison
out, err = e2e.AllocExec(allocID, "task", "env", ns, nil)
f.NoError(err)
match := re.FindStringSubmatch(out)
f.NotNil(match, fmt.Errorf("could not find VAULT_TOKEN, got:%v\n", out))
taskToken := match[1]
// Update secret
out, err = e2e.Command("vault", "kv", "put",
fmt.Sprintf("%s/myapp", tc.secretsPath), "key=UPDATED")
f.NoError(err, out)
elapsed := time.Since(ttlStart)
time.Sleep((time.Second * 60) - elapsed)
// tokens will not be updated
out, err = e2e.AllocExec(allocID, "task", "env", ns, nil)
f.NoError(err)
match = re.FindStringSubmatch(out)
f.NotNil(match, fmt.Errorf("could not find VAULT_TOKEN, got:%v\n", out))
f.Equal(taskToken, match[1])
// cert will be renewed
_, err = waitForAllocSecret(allocID, "task", "/secrets/certificate.crt",
func(out string) bool {
return strings.Contains(out, "BEGIN CERTIFICATE") &&
out != renderedCert
}, wc)
f.NoError(err)
// secret will *not* be renewed because it doesn't have a lease to expire
_, err = waitForAllocSecret(allocID, "task", "/secrets/access.key",
func(out string) bool {
return strings.Contains(out, secretValue)
}, wc)
f.NoError(err)
}
// We need to namespace the keys in the policy, so read it in and replace the
// values of the policy names
func writePolicy(policyID, policyPath, testID string) (string, error) {
raw, err := ioutil.ReadFile(policyPath)
if err != nil {
return "", err
}
policyDoc := string(raw)
policyDoc = strings.ReplaceAll(policyDoc, "TESTID", testID)
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
defer cancel()
cmd := exec.CommandContext(ctx, "vault", "policy", "write", policyID, "-")
stdin, err := cmd.StdinPipe()
if err != nil {
return "", err
}
go func() {
defer stdin.Close()
io.WriteString(stdin, policyDoc)
}()
out, err := cmd.CombinedOutput()
return string(out), err
}
// We need to namespace the vault paths in the job, so parse it
// and replace the values of the template and vault fields
func runJob(jobID, testID string, index int) error {
raw, err := ioutil.ReadFile("./vaultsecrets/input/secrets.nomad")
if err != nil {
return err
}
jobspec := string(raw)
jobspec = strings.ReplaceAll(jobspec, "TESTID", testID)
jobspec = strings.ReplaceAll(jobspec, "DEPLOYNUMBER", string(rune(index)))
return e2e.RegisterFromJobspec(jobID, jobspec)
}
// waitForAllocSecret is similar to e2e.WaitForAllocFile but uses `alloc exec`
// to be able to read the secrets dir, which is not available to `alloc fs`
func waitForAllocSecret(allocID, taskID, path string, test func(string) bool, wc *e2e.WaitConfig) (string, error) {
var err error
var out string
interval, retries := wc.OrDefault()
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(interval)
out, err = e2e.Command("nomad", "alloc", "exec", "-task", taskID, allocID, "cat", path)
if err != nil {
return false, fmt.Errorf("could not get file %q from allocation %q: %v",
path, allocID, err)
}
return test(out),
fmt.Errorf("test for file content failed: got\n%#v", out)
}, func(e error) {
err = e
})
return out, err
}
// this will always be sorted
func latestAllocID(jobID string) (string, error) {
allocs, err := e2e.AllocsForJob(jobID, ns)
if err != nil {
return "", err
}
return allocs[0]["ID"], nil
}