open-nomad/e2e/consul/script_checks.go
Seth Hoenig 7f1191111d e2e: add tests for consul namespaces from nomad oss
This PR adds a set of tests to the Consul test suite for testing
Nomad OSS's behavior around setting Consul Namespace on groups,
which is to ignore the setting (as Consul Namespaces are currently
an Enterprise feature).

Tests are generally a reduced facsimile of existing tests, modified
to check behavior of when group.consul.namespace is set and not set.
Verification is oriented around what happens in Consul; the in-depth
functional correctness of these features is left to the original tests.

Nomad ENT will get its own version of these tests in `namespaces_ent.go`.
2021-04-16 15:32:37 -06:00

209 lines
8.7 KiB
Go

package consul
import (
"bytes"
"context"
"fmt"
"os"
"strings"
"time"
capi "github.com/hashicorp/consul/api"
napi "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/stretchr/testify/require"
)
type ScriptChecksE2ETest struct {
framework.TC
jobIds []string
}
func (tc *ScriptChecksE2ETest) BeforeAll(f *framework.F) {
// Ensure cluster has leader before running tests
e2eutil.WaitForLeader(f.T(), tc.Nomad())
// Ensure that we have at least 1 client node in ready state
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
}
// TestGroupScriptCheck runs a job with a single task group with several services
// and associated script checks. It updates, stops, etc. the job to verify
// that script checks are re-registered as expected.
func (tc *ScriptChecksE2ETest) TestGroupScriptCheck(f *framework.F) {
r := require.New(f.T())
nomadClient := tc.Nomad()
consulClient := tc.Consul()
jobId := "checks_group" + uuid.Short()
tc.jobIds = append(tc.jobIds, jobId)
// Job run: verify that checks were registered in Consul
allocs := e2eutil.RegisterAndWaitForAllocs(f.T(),
nomadClient, "consul/input/checks_group.nomad", jobId, "")
r.Equal(1, len(allocs))
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
// Check in warning state becomes healthy after check passes
_, _, err := exec(nomadClient, allocs,
[]string{"/bin/sh", "-c", "touch /tmp/${NOMAD_ALLOC_ID}-alive-2b"})
r.NoError(err)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthPassing)
// Job update: verify checks are re-registered in Consul
allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
nomadClient, "consul/input/checks_group_update.nomad", jobId, "")
r.Equal(1, len(allocs))
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthPassing)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
// Verify we don't have any linger script checks running on the client
out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"})
r.NoError(err)
running := strings.Split(strings.TrimSpace(out.String()), "\n")
r.LessOrEqual(len(running), 2) // task itself + 1 check == 2
// Clean job stop: verify that checks were deregistered in Consul
_, _, err = nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop
r.NoError(err)
e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-1")
e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-2")
e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-3")
// Restore for next test
allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
nomadClient, "consul/input/checks_group.nomad", jobId, "")
r.Equal(2, len(allocs))
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
// Crash a task: verify that checks become healthy again
_, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"})
if err != nil && err.Error() != "plugin is shut down" {
r.FailNow("unexpected error: %v", err)
}
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
// TODO(tgross) ...
// Restart client: verify that checks are re-registered
}
// TestTaskScriptCheck runs a job with a single task with several services
// and associated script checks. It updates, stops, etc. the job to verify
// that script checks are re-registered as expected.
func (tc *ScriptChecksE2ETest) TestTaskScriptCheck(f *framework.F) {
r := require.New(f.T())
nomadClient := tc.Nomad()
consulClient := tc.Consul()
jobId := "checks_task" + uuid.Short()
tc.jobIds = append(tc.jobIds, jobId)
// Job run: verify that checks were registered in Consul
allocs := e2eutil.RegisterAndWaitForAllocs(f.T(),
nomadClient, "consul/input/checks_task.nomad", jobId, "")
r.Equal(1, len(allocs))
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
// Check in warning state becomes healthy after check passes
_, _, err := exec(nomadClient, allocs,
[]string{"/bin/sh", "-c", "touch ${NOMAD_TASK_DIR}/alive-2b"})
r.NoError(err)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthPassing)
// Job update: verify checks are re-registered in Consul
allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
nomadClient, "consul/input/checks_task_update.nomad", jobId, "")
r.Equal(1, len(allocs))
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthPassing)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
// Verify we don't have any linger script checks running on the client
out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"})
r.NoError(err)
running := strings.Split(strings.TrimSpace(out.String()), "\n")
r.LessOrEqual(len(running), 2) // task itself + 1 check == 2
// Clean job stop: verify that checks were deregistered in Consul
_, _, err = nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop
r.NoError(err)
e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-1")
e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-2")
e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-3")
// Restore for next test
allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
nomadClient, "consul/input/checks_task.nomad", jobId, "")
r.Equal(2, len(allocs))
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
// Crash a task: verify that checks become healthy again
_, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"})
if err != nil && err.Error() != "plugin is shut down" {
r.FailNow("unexpected error: %v", err)
}
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning)
e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
// TODO(tgross) ...
// Restart client: verify that checks are re-registered
}
func (tc *ScriptChecksE2ETest) AfterEach(f *framework.F) {
r := require.New(f.T())
nomadClient := tc.Nomad()
jobs := nomadClient.Jobs()
// Stop all jobs in test
for _, id := range tc.jobIds {
_, _, err := jobs.Deregister(id, true, nil)
r.NoError(err)
}
// Garbage collect
r.NoError(nomadClient.System().GarbageCollect())
}
func exec(client *napi.Client, allocs []*napi.AllocationListStub, command []string) (bytes.Buffer, bytes.Buffer, error) {
ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second)
defer cancelFn()
// we're getting a list of from the registration call here but
// one of them might be stopped or stopping, which will return
// an error if we try to exec into it.
var alloc *napi.Allocation
for _, stub := range allocs {
if stub.DesiredStatus == "run" {
alloc = &napi.Allocation{
ID: stub.ID,
Namespace: stub.Namespace,
NodeID: stub.NodeID,
}
}
}
var stdout, stderr bytes.Buffer
if alloc == nil {
return stdout, stderr, fmt.Errorf("no allocation ready for exec")
}
_, err := client.Allocations().Exec(ctx,
alloc, "test", false,
command,
os.Stdin, &stdout, &stderr,
make(chan napi.TerminalSize), nil)
return stdout, stderr, err
}