client: add support for checks in nomad services

This PR adds support for specifying checks in services registered to
the built-in nomad service provider.

Currently only HTTP and TCP checks are supported, though more types
could be added later.
This commit is contained in:
Seth Hoenig 2022-06-07 09:18:19 -05:00
parent 2f444a43f0
commit 297d386bdc
46 changed files with 3626 additions and 330 deletions

View File

@ -8,9 +8,8 @@ import (
"io"
"time"
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics"
"github.com/hashicorp/go-msgpack/codec"
"github.com/hashicorp/nomad/acl"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper"
@ -137,6 +136,29 @@ func (a *Allocations) Stats(args *cstructs.AllocStatsRequest, reply *cstructs.Al
return nil
}
// Checks is used to retrieve nomad service discovery check status information.
func (a *Allocations) Checks(args *cstructs.AllocChecksRequest, reply *cstructs.AllocChecksResponse) error {
defer metrics.MeasureSince([]string{"client", "allocations", "checks"}, time.Now())
// Get the allocation
alloc, err := a.c.GetAlloc(args.AllocID)
if err != nil {
return err
}
// Check read-job permission
if aclObj, aclErr := a.c.ResolveToken(args.AuthToken); aclErr != nil {
return aclErr
} else if aclObj != nil && !aclObj.AllowNsOp(alloc.Namespace, acl.NamespaceCapabilityReadJob) {
return nstructs.ErrPermissionDenied
}
// Get the status information for the allocation
reply.Results = a.c.checkStore.List(alloc.ID)
return nil
}
// exec is used to execute command in a running task
func (a *Allocations) exec(conn io.ReadWriteCloser) {
defer metrics.MeasureSince([]string{"client", "allocations", "exec"}, time.Now())

View File

@ -23,6 +23,7 @@ import (
nconfig "github.com/hashicorp/nomad/nomad/structs/config"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
"golang.org/x/sys/unix"
)
@ -529,6 +530,92 @@ func TestAllocations_Stats_ACL(t *testing.T) {
}
}
func TestAlloc_Checks(t *testing.T) {
ci.Parallel(t)
client, cleanup := TestClient(t, nil)
t.Cleanup(func() {
must.NoError(t, cleanup())
})
now := time.Date(2022, 3, 4, 5, 6, 7, 8, time.UTC).Unix()
qr1 := &nstructs.CheckQueryResult{
ID: "abc123",
Mode: "healthiness",
Status: "passing",
Output: "nomad: http ok",
Timestamp: now,
Group: "group",
Task: "task",
Service: "service",
Check: "check",
}
qr2 := &nstructs.CheckQueryResult{
ID: "def456",
Mode: "readiness",
Status: "passing",
Output: "nomad: http ok",
Timestamp: now,
Group: "group",
Service: "service2",
Check: "check",
}
t.Run("alloc does not exist", func(t *testing.T) {
request := cstructs.AllocChecksRequest{AllocID: "d3e34248-4843-be75-d4fd-4899975cfb38"}
var response cstructs.AllocChecksResponse
err := client.ClientRPC("Allocations.Checks", &request, &response)
must.EqError(t, err, `Unknown allocation "d3e34248-4843-be75-d4fd-4899975cfb38"`)
})
t.Run("no checks for alloc", func(t *testing.T) {
alloc := mock.Alloc()
must.NoError(t, client.addAlloc(alloc, ""))
request := cstructs.AllocChecksRequest{AllocID: alloc.ID}
var response cstructs.AllocChecksResponse
err := client.ClientRPC("Allocations.Checks", &request, &response)
must.NoError(t, err)
must.MapEmpty(t, response.Results)
})
t.Run("two in one alloc", func(t *testing.T) {
alloc := mock.Alloc()
must.NoError(t, client.addAlloc(alloc, ""))
must.NoError(t, client.checkStore.Set(alloc.ID, qr1))
must.NoError(t, client.checkStore.Set(alloc.ID, qr2))
request := cstructs.AllocChecksRequest{AllocID: alloc.ID}
var response cstructs.AllocChecksResponse
err := client.ClientRPC("Allocations.Checks", &request, &response)
must.NoError(t, err)
must.MapEq(t, map[nstructs.CheckID]*nstructs.CheckQueryResult{
"abc123": qr1,
"def456": qr2,
}, response.Results)
})
t.Run("ignore unrelated alloc", func(t *testing.T) {
alloc1 := mock.Alloc()
must.NoError(t, client.addAlloc(alloc1, ""))
alloc2 := mock.Alloc()
must.NoError(t, client.addAlloc(alloc2, ""))
must.NoError(t, client.checkStore.Set(alloc1.ID, qr1))
must.NoError(t, client.checkStore.Set(alloc2.ID, qr2))
request := cstructs.AllocChecksRequest{AllocID: alloc1.ID}
var response cstructs.AllocChecksResponse
err := client.ClientRPC("Allocations.Checks", &request, &response)
must.NoError(t, err)
must.MapEq(t, map[nstructs.CheckID]*nstructs.CheckQueryResult{
"abc123": qr1,
}, response.Results)
})
}
func TestAlloc_ExecStreaming(t *testing.T) {
ci.Parallel(t)
require := require.New(t)

View File

@ -8,9 +8,11 @@ import (
"time"
"github.com/hashicorp/consul/api"
hclog "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/serviceregistration"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)
@ -18,9 +20,9 @@ const (
// AllocHealthEventSource is the source used for emitting task events
AllocHealthEventSource = "Alloc Unhealthy"
// consulCheckLookupInterval is the interval at which we check if the
// Consul checks are healthy or unhealthy.
consulCheckLookupInterval = 500 * time.Millisecond
// checkLookupInterval is the pace at which we check if the Consul or Nomad
// checks for an allocation are healthy or unhealthy.
checkLookupInterval = 500 * time.Millisecond
)
// Tracker tracks the health of an allocation and makes health events watchable
@ -40,23 +42,30 @@ type Tracker struct {
// considered healthy
minHealthyTime time.Duration
// checkLookupInterval is the interval at which we check if the
// Consul checks are healthy or unhealthy.
// checkLookupInterval is the repeated interval after which which we check
// if the Consul checks are healthy or unhealthy.
checkLookupInterval time.Duration
// useChecks specifies whether to use Consul healh checks or not
// useChecks specifies whether to consider Consul and Nomad service checks.
useChecks bool
// consulCheckCount is the number of checks the task group will attempt to
// register
// consulCheckCount is the total number of Consul service checks in the task
// group including task level checks.
consulCheckCount int
// nomadCheckCount is the total the number of Nomad service checks in the task
// group including task level checks.
nomadCheckCount int
// allocUpdates is a listener for retrieving new alloc updates
allocUpdates *cstructs.AllocListener
// consulClient is used to look up the state of the task's checks
// consulClient is used to look up the status of Consul service checks
consulClient serviceregistration.Handler
// checkStore is used to lookup the status of Nomad service checks
checkStore checkstore.Shim
// healthy is used to signal whether we have determined the allocation to be
// healthy or unhealthy
healthy chan bool
@ -69,11 +78,11 @@ type Tracker struct {
// These tasks may terminate without affecting alloc health
lifecycleTasks map[string]string
// l is used to lock shared fields listed below
l sync.Mutex
// lock is used to lock shared fields listed below
lock sync.Mutex
// tasksHealthy marks whether all the tasks have met their health check
// (disregards Consul)
// (disregards Consul and Nomad checks)
tasksHealthy bool
// allocFailed marks whether the allocation failed
@ -82,22 +91,31 @@ type Tracker struct {
// checksHealthy marks whether all the task's Consul checks are healthy
checksHealthy bool
// taskHealth contains the health state for each task
// taskHealth contains the health state for each task in the allocation
// name -> state
taskHealth map[string]*taskHealthState
// logger is for logging things
logger hclog.Logger
}
// NewTracker returns a health tracker for the given allocation. An alloc
// listener and consul API object are given so that the watcher can detect
// health changes.
func NewTracker(parentCtx context.Context, logger hclog.Logger, alloc *structs.Allocation,
allocUpdates *cstructs.AllocListener, consulClient serviceregistration.Handler,
minHealthyTime time.Duration, useChecks bool) *Tracker {
// NewTracker returns a health tracker for the given allocation.
//
// Depending on job configuration, an allocation's health takes into consideration
// - An alloc listener
// - Consul checks (via consul API)
// - Nomad checks (via client state)
func NewTracker(
parentCtx context.Context,
logger hclog.Logger,
alloc *structs.Allocation,
allocUpdates *cstructs.AllocListener,
consulClient serviceregistration.Handler,
checkStore checkstore.Shim,
minHealthyTime time.Duration,
useChecks bool,
) *Tracker {
// Do not create a named sub-logger as the hook controlling
// this struct should pass in an appropriately named
// sub-logger.
t := &Tracker{
healthy: make(chan bool, 1),
allocStopped: make(chan struct{}),
@ -107,7 +125,8 @@ func NewTracker(parentCtx context.Context, logger hclog.Logger, alloc *structs.A
useChecks: useChecks,
allocUpdates: allocUpdates,
consulClient: consulClient,
checkLookupInterval: consulCheckLookupInterval,
checkStore: checkStore,
checkLookupInterval: checkLookupInterval,
logger: logger,
lifecycleTasks: map[string]string{},
}
@ -120,24 +139,42 @@ func NewTracker(parentCtx context.Context, logger hclog.Logger, alloc *structs.A
t.lifecycleTasks[task.Name] = task.Lifecycle.Hook
}
for _, s := range task.Services {
t.consulCheckCount += len(s.Checks)
}
c, n := countChecks(task.Services)
t.consulCheckCount += c
t.nomadCheckCount += n
}
for _, s := range t.tg.Services {
t.consulCheckCount += len(s.Checks)
}
c, n := countChecks(t.tg.Services)
t.consulCheckCount += c
t.nomadCheckCount += n
t.ctx, t.cancelFn = context.WithCancel(parentCtx)
return t
}
func countChecks(services []*structs.Service) (consul, nomad int) {
for _, service := range services {
switch service.Provider {
case structs.ServiceProviderNomad:
nomad += len(service.Checks)
default:
consul += len(service.Checks)
}
}
return
}
// Start starts the watcher.
func (t *Tracker) Start() {
go t.watchTaskEvents()
if t.useChecks {
switch {
case !t.useChecks:
return
case t.consulCheckCount > 0:
go t.watchConsulEvents()
case t.nomadCheckCount > 0:
go t.watchNomadEvents()
}
}
@ -157,8 +194,8 @@ func (t *Tracker) AllocStoppedCh() <-chan struct{} {
// health has been determined. Only tasks that have contributed to the
// allocation being unhealthy will have an event.
func (t *Tracker) TaskEvents() map[string]*structs.TaskEvent {
t.l.Lock()
defer t.l.Unlock()
t.lock.Lock()
defer t.lock.Unlock()
// Nothing to do since the failure wasn't task related
if t.allocFailed {
@ -180,10 +217,11 @@ func (t *Tracker) TaskEvents() map[string]*structs.TaskEvent {
}
// setTaskHealth is used to set the tasks health as healthy or unhealthy. If the
// allocation is terminal, health is immediately broadcasted.
// allocation is terminal, health is immediately broadcast.
func (t *Tracker) setTaskHealth(healthy, terminal bool) {
t.l.Lock()
defer t.l.Unlock()
t.lock.Lock()
defer t.lock.Unlock()
t.tasksHealthy = healthy
// if unhealthy, force waiting for new checks health status
@ -192,15 +230,23 @@ func (t *Tracker) setTaskHealth(healthy, terminal bool) {
return
}
// If we are marked healthy but we also require Consul to be healthy and it
// isn't yet, return, unless the task is terminal
requireConsul := t.useChecks && t.consulCheckCount > 0
if !terminal && healthy && requireConsul && !t.checksHealthy {
// If we are marked healthy but we also require Consul checks to be healthy
// and they are not yet, return, unless the task is terminal.
usesConsulChecks := t.useChecks && t.consulCheckCount > 0
if !terminal && healthy && usesConsulChecks && !t.checksHealthy {
return
}
// If we are marked healthy but also require Nomad checks to be healthy and
// they are not yet, return, unless the task is terminal.
usesNomadChecks := t.useChecks && t.nomadCheckCount > 0
if !terminal && healthy && usesNomadChecks && !t.checksHealthy {
return
}
select {
case t.healthy <- healthy:
// nothing
default:
}
@ -210,9 +256,13 @@ func (t *Tracker) setTaskHealth(healthy, terminal bool) {
// setCheckHealth is used to mark the checks as either healthy or unhealthy.
// returns true if health is propagated and no more health monitoring is needed
//
// todo: this is currently being shared by watchConsulEvents and watchNomadEvents,
// and must be split up if/when we support registering services (and thus checks)
// of different providers.
func (t *Tracker) setCheckHealth(healthy bool) bool {
t.l.Lock()
defer t.l.Unlock()
t.lock.Lock()
defer t.lock.Unlock()
// check health should always be false if tasks are unhealthy
// as checks might be missing from unhealthy tasks
@ -225,10 +275,11 @@ func (t *Tracker) setCheckHealth(healthy bool) bool {
select {
case t.healthy <- healthy:
// nothing
default:
}
// Shutdown the tracker
// Shutdown the tracker, things are healthy so nothing to do
t.cancelFn()
return true
}
@ -244,13 +295,8 @@ func (t *Tracker) markAllocStopped() {
func (t *Tracker) watchTaskEvents() {
alloc := t.alloc
allStartedTime := time.Time{}
healthyTimer := time.NewTimer(0)
if !healthyTimer.Stop() {
select {
case <-healthyTimer.C:
default:
}
}
waiter := newHealthyFuture()
for {
// If the alloc is being stopped by the server just exit
@ -262,7 +308,7 @@ func (t *Tracker) watchTaskEvents() {
}
// Store the task states
t.l.Lock()
t.lock.Lock()
for task, state := range alloc.TaskStates {
//TODO(schmichael) for now skip unknown tasks as
//they're task group services which don't currently
@ -271,7 +317,7 @@ func (t *Tracker) watchTaskEvents() {
v.state = state
}
}
t.l.Unlock()
t.lock.Unlock()
// Detect if the alloc is unhealthy or if all tasks have started yet
latestStartTime := time.Time{}
@ -308,9 +354,9 @@ func (t *Tracker) watchTaskEvents() {
// individual tasks failed, that means something failed at the alloc
// level.
if alloc.ClientStatus == structs.AllocClientStatusFailed {
t.l.Lock()
t.lock.Lock()
t.allocFailed = true
t.l.Unlock()
t.lock.Unlock()
t.setTaskHealth(false, true)
return
@ -321,17 +367,12 @@ func (t *Tracker) watchTaskEvents() {
t.setTaskHealth(false, false)
// Avoid the timer from firing at the old start time
if !healthyTimer.Stop() {
select {
case <-healthyTimer.C:
default:
}
}
waiter.disable()
// Set the timer since all tasks are started
if !latestStartTime.IsZero() {
allStartedTime = latestStartTime
healthyTimer.Reset(t.minHealthyTime)
waiter.wait(t.minHealthyTime)
}
}
@ -343,33 +384,66 @@ func (t *Tracker) watchTaskEvents() {
return
}
alloc = newAlloc
case <-healthyTimer.C:
case <-waiter.C():
t.setTaskHealth(true, false)
}
}
}
// watchConsulEvents is a watcher for the health of the allocation's Consul
// checks. If all checks report healthy the watcher will exit after the
// MinHealthyTime has been reached, Otherwise the watcher will continue to
// check unhealthy checks until the ctx is cancelled
func (t *Tracker) watchConsulEvents() {
// checkTicker is the ticker that triggers us to look at the checks in
// Consul
checkTicker := time.NewTicker(t.checkLookupInterval)
defer checkTicker.Stop()
// healthyFuture is used to fire after checks have been healthy for MinHealthyTime
type healthyFuture struct {
timer *time.Timer
}
// healthyTimer fires when the checks have been healthy for the
// MinHealthyTime
healthyTimer := time.NewTimer(0)
if !healthyTimer.Stop() {
// newHealthyFuture will create a healthyFuture in a disabled state, and
// will do nothing until a call to wait takes place
func newHealthyFuture() *healthyFuture {
timer := time.NewTimer(0)
ht := &healthyFuture{timer: timer}
ht.disable()
return ht
}
// disable the healthyFuture from triggering
func (h *healthyFuture) disable() {
if !h.timer.Stop() {
// must ensure channel is clear
// https://pkg.go.dev/time#Timer.Stop
select {
case <-healthyTimer.C:
case <-h.timer.C:
default:
}
}
}
// primed marks whether the healthy timer has been set
// wait will reset the healthyFuture to trigger after dur passes.
func (h *healthyFuture) wait(dur time.Duration) {
// must ensure timer is stopped
// https://pkg.go.dev/time#Timer.Reset
h.disable()
h.timer.Reset(dur)
}
// C returns a channel on which the future will send when ready.
func (h *healthyFuture) C() <-chan time.Time {
return h.timer.C
}
// watchConsulEvents is a watcher for the health of the allocation's Consul
// checks. If all checks report healthy the watcher will exit after the
// MinHealthyTime has been reached, otherwise the watcher will continue to
// check unhealthy checks until the ctx is cancelled.
//
// Does not watch Nomad service checks; see watchNomadEvents for those.
func (t *Tracker) watchConsulEvents() {
// checkTicker is the ticker that triggers us to look at the checks in Consul
checkTicker := time.NewTicker(t.checkLookupInterval)
defer checkTicker.Stop()
// waiter is used to fire when the checks have been healthy for the MinHealthyTime
waiter := newHealthyFuture()
// primed marks whether the healthy waiter has been set
primed := false
// Store whether the last Consul checks call was successful or not
@ -381,8 +455,12 @@ func (t *Tracker) watchConsulEvents() {
OUTER:
for {
select {
// we are shutting down
case <-t.ctx.Done():
return
// it is time to check the checks
case <-checkTicker.C:
newAllocReg, err := t.consulClient.AllocRegistrations(t.alloc.ID)
if err != nil {
@ -395,12 +473,15 @@ OUTER:
consulChecksErr = false
allocReg = newAllocReg
}
case <-healthyTimer.C:
// enough time has passed with healthy checks
case <-waiter.C():
if t.setCheckHealth(true) {
// final health set and propagated
return
}
// tasks are unhealthy, reset and wait until all is healthy
// checks are healthy but tasks are unhealthy,
// reset and wait until all is healthy
primed = false
}
@ -409,7 +490,7 @@ OUTER:
}
// Store the task registrations
t.l.Lock()
t.lock.Lock()
for task, reg := range allocReg.Tasks {
//TODO(schmichael) for now skip unknown tasks as
//they're task group services which don't currently
@ -418,7 +499,7 @@ OUTER:
v.taskRegistrations = reg
}
}
t.l.Unlock()
t.lock.Unlock()
// Detect if all the checks are passing
passed := true
@ -427,16 +508,16 @@ OUTER:
for _, treg := range allocReg.Tasks {
for _, sreg := range treg.Services {
for _, check := range sreg.Checks {
onupdate := sreg.CheckOnUpdate[check.CheckID]
onUpdate := sreg.CheckOnUpdate[check.CheckID]
switch check.Status {
case api.HealthPassing:
continue
case api.HealthWarning:
if onupdate == structs.OnUpdateIgnoreWarn || onupdate == structs.OnUpdateIgnore {
if onUpdate == structs.OnUpdateIgnoreWarn || onUpdate == structs.OnUpdateIgnore {
continue
}
case api.HealthCritical:
if onupdate == structs.OnUpdateIgnore {
if onUpdate == structs.OnUpdateIgnore {
continue
}
default:
@ -452,25 +533,94 @@ OUTER:
if !passed {
// Reset the timer since we have transitioned back to unhealthy
if primed {
if !healthyTimer.Stop() {
select {
case <-healthyTimer.C:
default:
}
}
primed = false
waiter.disable()
}
} else if !primed {
// Reset the timer to fire after MinHealthyTime
if !healthyTimer.Stop() {
select {
case <-healthyTimer.C:
default:
primed = true
waiter.disable()
waiter.wait(t.minHealthyTime)
}
}
}
// watchNomadEvents is a watcher for the health of the allocation's Nomad checks.
// If all checks report healthy the watcher will exit after the MinHealthyTime has
// been reached, otherwise the watcher will continue to check unhealthy checks until
// the ctx is cancelled.
//
// Does not watch Consul service checks; see watchConsulEvents for those.
func (t *Tracker) watchNomadEvents() {
// checkTicker is the ticker that triggers us to look at the checks in Nomad
checkTicker, cancel := helper.NewSafeTimer(t.checkLookupInterval)
defer cancel()
// waiter is used to fire when the checks have been healthy for the MinHealthyTime
waiter := newHealthyFuture()
// allocID of the allocation we are watching checks for
allocID := t.alloc.ID
// primed marks whether the healthy waiter has been set
primed := false
// latest set of nomad check results
var results map[structs.CheckID]*structs.CheckQueryResult
for {
select {
// we are shutting down
case <-t.ctx.Done():
return
// it is time to check the checks
case <-checkTicker.C:
results = t.checkStore.List(allocID)
checkTicker.Reset(t.checkLookupInterval)
// enough time has passed with healthy checks
case <-waiter.C():
if t.setCheckHealth(true) { // todo(shoenig) this needs to be split between Consul and Nomad
return // final health set and propagated
}
// checks are healthy but tasks are unhealthy, reset and wait
primed = false
}
// scan to see if any checks are failing
passing := true
for _, result := range results {
switch result.Status {
case structs.CheckSuccess:
continue
case structs.CheckFailure:
if result.Mode == structs.Readiness {
continue
}
passing = false
default:
// i.e. pending check; do not consider healthy or ready
passing = false
}
if !passing {
break // 1+ check is failing; no need to continue
}
}
if !passing {
// at least one check is failing, transition to unhealthy
t.setCheckHealth(false)
primed = false
waiter.disable()
}
if passing && !primed {
// healthy but not yet primed, set timer to wait
primed = true
healthyTimer.Reset(t.minHealthyTime)
waiter.wait(t.minHealthyTime)
}
}
}
@ -488,15 +638,13 @@ type taskHealthState struct {
// strategy of the group. It returns true if the task has contributed to the
// allocation being unhealthy and if so, an event description of why.
func (t *taskHealthState) event(deadline time.Time, healthyDeadline, minHealthyTime time.Duration, useChecks bool) (string, bool) {
requireChecks := false
desiredChecks := 0
for _, s := range t.task.Services {
if nc := len(s.Checks); nc > 0 {
requireChecks = true
desiredChecks += nc
}
}
requireChecks = requireChecks && useChecks
requireChecks := (desiredChecks > 0) && useChecks
if t.state != nil {
if t.state.Failed {
@ -507,7 +655,7 @@ func (t *taskHealthState) event(deadline time.Time, healthyDeadline, minHealthyT
case structs.TaskStatePending:
return fmt.Sprintf("Task not running by healthy_deadline of %v", healthyDeadline), true
case structs.TaskStateDead:
// hook tasks are healthy when dead successfully
// non-sidecar hook lifecycle tasks are healthy if they exit with success
if t.task.Lifecycle == nil || t.task.Lifecycle.Sidecar {
return "Unhealthy because of dead task", true
}

View File

@ -10,16 +10,19 @@ import (
consulapi "github.com/hashicorp/consul/api"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/serviceregistration"
regMock "github.com/hashicorp/nomad/client/serviceregistration/mock"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
regmock "github.com/hashicorp/nomad/client/serviceregistration/mock"
"github.com/hashicorp/nomad/client/state"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)
func TestTracker_Checks_Healthy(t *testing.T) {
func TestTracker_ConsulChecks_Healthy(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
@ -60,7 +63,7 @@ func TestTracker_Checks_Healthy(t *testing.T) {
// Don't reply on the first call
var called uint64
consul := regMock.NewServiceRegistrationHandler(logger)
consul := regmock.NewServiceRegistrationHandler(logger)
consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) {
if atomic.AddUint64(&called, 1) == 1 {
return nil, nil
@ -76,9 +79,9 @@ func TestTracker_Checks_Healthy(t *testing.T) {
ctx, cancelFn := context.WithCancel(context.Background())
defer cancelFn()
checks := checkstore.NewStore(logger, state.NewMemDB(logger))
checkInterval := 10 * time.Millisecond
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
time.Millisecond, true)
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true)
tracker.checkLookupInterval = checkInterval
tracker.Start()
@ -90,6 +93,143 @@ func TestTracker_Checks_Healthy(t *testing.T) {
}
}
func TestTracker_NomadChecks_Healthy(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
alloc.Job.TaskGroups[0].Tasks[0].Services[0].Provider = "nomad"
logger := testlog.HCLogger(t)
b := cstructs.NewAllocBroadcaster(logger)
defer b.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Synthesize running alloc and tasks
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.TaskStates = map[string]*structs.TaskState{
alloc.Job.TaskGroups[0].Tasks[0].Name: {
State: structs.TaskStateRunning,
StartedAt: time.Now(),
},
}
checks := checkstore.NewStore(logger, state.NewMemDB(logger))
err := checks.Set(alloc.ID, &structs.CheckQueryResult{
ID: "abc123",
Mode: "healthiness",
Status: "pending",
Output: "nomad: waiting to run",
Timestamp: time.Now().Unix(),
Group: alloc.TaskGroup,
Task: alloc.Job.TaskGroups[0].Tasks[0].Name,
Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name,
Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name,
})
must.NoError(t, err)
consul := regmock.NewServiceRegistrationHandler(logger)
checkInterval := 10 * time.Millisecond
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true)
tracker.checkLookupInterval = checkInterval
tracker.Start()
go func() {
// wait a bit then update the check to passing
time.Sleep(15 * time.Millisecond)
must.NoError(t, checks.Set(alloc.ID, &structs.CheckQueryResult{
ID: "abc123",
Mode: "healthiness",
Status: "success",
Output: "nomad: http ok",
Timestamp: time.Now().Unix(),
Group: alloc.TaskGroup,
Task: alloc.Job.TaskGroups[0].Tasks[0].Name,
Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name,
Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name,
}))
}()
select {
case <-time.After(4 * checkInterval):
t.Fatalf("timed out while waiting for success")
case healthy := <-tracker.HealthyCh():
must.True(t, healthy)
}
}
func TestTracker_NomadChecks_Unhealthy(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
alloc.Job.TaskGroups[0].Tasks[0].Services[0].Provider = "nomad"
logger := testlog.HCLogger(t)
b := cstructs.NewAllocBroadcaster(logger)
defer b.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Synthesize running alloc and tasks
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.TaskStates = map[string]*structs.TaskState{
alloc.Job.TaskGroups[0].Tasks[0].Name: {
State: structs.TaskStateRunning,
StartedAt: time.Now(),
},
}
checks := checkstore.NewStore(logger, state.NewMemDB(logger))
err := checks.Set(alloc.ID, &structs.CheckQueryResult{
ID: "abc123",
Mode: "healthiness",
Status: "pending", // start out pending
Output: "nomad: waiting to run",
Timestamp: time.Now().Unix(),
Group: alloc.TaskGroup,
Task: alloc.Job.TaskGroups[0].Tasks[0].Name,
Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name,
Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name,
})
must.NoError(t, err)
consul := regmock.NewServiceRegistrationHandler(logger)
checkInterval := 10 * time.Millisecond
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true)
tracker.checkLookupInterval = checkInterval
tracker.Start()
go func() {
// wait a bit then update the check to failing
time.Sleep(15 * time.Millisecond)
must.NoError(t, checks.Set(alloc.ID, &structs.CheckQueryResult{
ID: "abc123",
Mode: "healthiness",
Status: "failing",
Output: "connection refused",
Timestamp: time.Now().Unix(),
Group: alloc.TaskGroup,
Task: alloc.Job.TaskGroups[0].Tasks[0].Name,
Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name,
Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name,
}))
}()
// make sure we are always unhealthy across 4 check intervals
for i := 0; i < 4; i++ {
<-time.After(checkInterval)
select {
case <-tracker.HealthyCh():
t.Fatalf("should not receive on healthy chan with failing check")
default:
}
}
}
func TestTracker_Checks_PendingPostStop_Healthy(t *testing.T) {
ci.Parallel(t)
@ -112,13 +252,13 @@ func TestTracker_Checks_PendingPostStop_Healthy(t *testing.T) {
b := cstructs.NewAllocBroadcaster(logger)
defer b.Close()
consul := regMock.NewServiceRegistrationHandler(logger)
consul := regmock.NewServiceRegistrationHandler(logger)
ctx, cancelFn := context.WithCancel(context.Background())
defer cancelFn()
checks := checkstore.NewStore(logger, state.NewMemDB(logger))
checkInterval := 10 * time.Millisecond
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
time.Millisecond, true)
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true)
tracker.checkLookupInterval = checkInterval
tracker.Start()
@ -153,13 +293,13 @@ func TestTracker_Succeeded_PostStart_Healthy(t *testing.T) {
b := cstructs.NewAllocBroadcaster(logger)
defer b.Close()
consul := regMock.NewServiceRegistrationHandler(logger)
consul := regmock.NewServiceRegistrationHandler(logger)
ctx, cancelFn := context.WithCancel(context.Background())
defer cancelFn()
checks := checkstore.NewStore(logger, state.NewMemDB(logger))
checkInterval := 10 * time.Millisecond
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
alloc.Job.TaskGroups[0].Migrate.MinHealthyTime, true)
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, alloc.Job.TaskGroups[0].Migrate.MinHealthyTime, true)
tracker.checkLookupInterval = checkInterval
tracker.Start()
@ -171,7 +311,7 @@ func TestTracker_Succeeded_PostStart_Healthy(t *testing.T) {
}
}
func TestTracker_Checks_Unhealthy(t *testing.T) {
func TestTracker_ConsulChecks_Unhealthy(t *testing.T) {
ci.Parallel(t)
alloc := mock.Alloc()
@ -220,7 +360,7 @@ func TestTracker_Checks_Unhealthy(t *testing.T) {
// Don't reply on the first call
var called uint64
consul := regMock.NewServiceRegistrationHandler(logger)
consul := regmock.NewServiceRegistrationHandler(logger)
consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) {
if atomic.AddUint64(&called, 1) == 1 {
return nil, nil
@ -236,9 +376,9 @@ func TestTracker_Checks_Unhealthy(t *testing.T) {
ctx, cancelFn := context.WithCancel(context.Background())
defer cancelFn()
checks := checkstore.NewStore(logger, state.NewMemDB(logger))
checkInterval := 10 * time.Millisecond
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
time.Millisecond, true)
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true)
tracker.checkLookupInterval = checkInterval
tracker.Start()
@ -249,9 +389,9 @@ func TestTracker_Checks_Unhealthy(t *testing.T) {
require.NoError(t, err)
})
tracker.l.Lock()
tracker.lock.Lock()
require.False(t, tracker.checksHealthy)
tracker.l.Unlock()
tracker.lock.Unlock()
select {
case v := <-tracker.HealthyCh():
@ -270,8 +410,7 @@ func TestTracker_Healthy_IfBothTasksAndConsulChecksAreHealthy(t *testing.T) {
ctx, cancelFn := context.WithCancel(context.Background())
defer cancelFn()
tracker := NewTracker(ctx, logger, alloc, nil, nil,
time.Millisecond, true)
tracker := NewTracker(ctx, logger, alloc, nil, nil, nil, time.Millisecond, true)
assertNoHealth := func() {
require.NoError(t, tracker.ctx.Err())
@ -362,7 +501,7 @@ func TestTracker_Checks_Healthy_Before_TaskHealth(t *testing.T) {
// Don't reply on the first call
var called uint64
consul := regMock.NewServiceRegistrationHandler(logger)
consul := regmock.NewServiceRegistrationHandler(logger)
consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) {
if atomic.AddUint64(&called, 1) == 1 {
return nil, nil
@ -378,9 +517,9 @@ func TestTracker_Checks_Healthy_Before_TaskHealth(t *testing.T) {
ctx, cancelFn := context.WithCancel(context.Background())
defer cancelFn()
checks := checkstore.NewStore(logger, state.NewMemDB(logger))
checkInterval := 10 * time.Millisecond
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
time.Millisecond, true)
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true)
tracker.checkLookupInterval = checkInterval
tracker.Start()
@ -419,7 +558,7 @@ func TestTracker_Checks_Healthy_Before_TaskHealth(t *testing.T) {
}
func TestTracker_Checks_OnUpdate(t *testing.T) {
func TestTracker_ConsulChecks_OnUpdate(t *testing.T) {
ci.Parallel(t)
cases := []struct {
@ -504,7 +643,7 @@ func TestTracker_Checks_OnUpdate(t *testing.T) {
// Don't reply on the first call
var called uint64
consul := regMock.NewServiceRegistrationHandler(logger)
consul := regmock.NewServiceRegistrationHandler(logger)
consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) {
if atomic.AddUint64(&called, 1) == 1 {
return nil, nil
@ -520,9 +659,9 @@ func TestTracker_Checks_OnUpdate(t *testing.T) {
ctx, cancelFn := context.WithCancel(context.Background())
defer cancelFn()
checks := checkstore.NewStore(logger, state.NewMemDB(logger))
checkInterval := 10 * time.Millisecond
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
time.Millisecond, true)
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true)
tracker.checkLookupInterval = checkInterval
tracker.Start()
@ -548,3 +687,120 @@ func TestTracker_Checks_OnUpdate(t *testing.T) {
})
}
}
func TestTracker_NomadChecks_OnUpdate(t *testing.T) {
ci.Parallel(t)
cases := []struct {
name string
checkMode structs.CheckMode
checkResult structs.CheckStatus
expectedPass bool
}{
{
name: "mode is healthiness and check is healthy",
checkMode: structs.Healthiness,
checkResult: structs.CheckSuccess,
expectedPass: true,
},
{
name: "mode is healthiness and check is unhealthy",
checkMode: structs.Healthiness,
checkResult: structs.CheckFailure,
expectedPass: false,
},
{
name: "mode is readiness and check is healthy",
checkMode: structs.Readiness,
checkResult: structs.CheckSuccess,
expectedPass: true,
},
{
name: "mode is readiness and check is healthy",
checkMode: structs.Readiness,
checkResult: structs.CheckFailure,
expectedPass: true,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
alloc := mock.Alloc()
alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
alloc.Job.TaskGroups[0].Tasks[0].Services[0].Provider = "nomad"
logger := testlog.HCLogger(t)
b := cstructs.NewAllocBroadcaster(logger)
defer b.Close()
// Synthesize running alloc and tasks
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.TaskStates = map[string]*structs.TaskState{
alloc.Job.TaskGroups[0].Tasks[0].Name: {
State: structs.TaskStateRunning,
StartedAt: time.Now(),
},
}
// Set a check that is pending
checks := checkstore.NewStore(logger, state.NewMemDB(logger))
err := checks.Set(alloc.ID, &structs.CheckQueryResult{
ID: "abc123",
Mode: tc.checkMode,
Status: structs.CheckPending,
Output: "nomad: waiting to run",
Timestamp: time.Now().Unix(),
Group: alloc.TaskGroup,
Task: alloc.Job.TaskGroups[0].Tasks[0].Name,
Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name,
Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name,
})
must.NoError(t, err)
go func() {
// wait a bit then update the check to passing
time.Sleep(15 * time.Millisecond)
must.NoError(t, checks.Set(alloc.ID, &structs.CheckQueryResult{
ID: "abc123",
Mode: tc.checkMode,
Status: tc.checkResult,
Output: "some output",
Timestamp: time.Now().Unix(),
Group: alloc.TaskGroup,
Task: alloc.Job.TaskGroups[0].Tasks[0].Name,
Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name,
Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name,
}))
}()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
consul := regmock.NewServiceRegistrationHandler(logger)
minHealthyTime := 1 * time.Millisecond
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, minHealthyTime, true)
tracker.checkLookupInterval = 10 * time.Millisecond
tracker.Start()
select {
case <-time.After(8 * tracker.checkLookupInterval):
if !tc.expectedPass {
// tracker should still be running
must.NoError(t, tracker.ctx.Err())
return
}
t.Fatal("timed out while waiting for health")
case h := <-tracker.HealthyCh():
require.True(t, h)
}
// For healthy checks, the tracker should stop watching
select {
case <-tracker.ctx.Done():
// Ok, tracker should exit after reporting healthy
default:
t.Fatal("expected tracker to exit after reporting healthy")
}
})
}
}

View File

@ -22,6 +22,7 @@ import (
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
"github.com/hashicorp/nomad/client/serviceregistration"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
cstate "github.com/hashicorp/nomad/client/state"
cstructs "github.com/hashicorp/nomad/client/structs"
@ -41,6 +42,7 @@ type allocRunner struct {
// Logger is the logger for the alloc runner.
logger log.Logger
// clientConfig is the client configuration block.
clientConfig *config.Config
// stateUpdater is used to emit updated alloc state
@ -183,6 +185,9 @@ type allocRunner struct {
// to perform service and check registration and deregistration.
serviceRegWrapper *wrapper.HandlerWrapper
// checkStore contains check status information
checkStore checkstore.Shim
// getter is an interface for retrieving artifacts.
getter cinterfaces.ArtifactGetter
}
@ -229,6 +234,7 @@ func NewAllocRunner(config *Config) (*allocRunner, error) {
serversContactedCh: config.ServersContactedCh,
rpcClient: config.RPCClient,
serviceRegWrapper: config.ServiceRegWrapper,
checkStore: config.CheckStore,
getter: config.Getter,
}

View File

@ -149,21 +149,22 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
newCgroupHook(ar.Alloc(), ar.cpusetManager),
newUpstreamAllocsHook(hookLogger, ar.prevAllocWatcher),
newDiskMigrationHook(hookLogger, ar.prevAllocMigrator, ar.allocDir),
newAllocHealthWatcherHook(hookLogger, alloc, hs, ar.Listener(), ar.consulClient),
newAllocHealthWatcherHook(hookLogger, alloc, hs, ar.Listener(), ar.consulClient, ar.checkStore),
newNetworkHook(hookLogger, ns, alloc, nm, nc, ar, builtTaskEnv),
newGroupServiceHook(groupServiceHookConfig{
alloc: alloc,
namespace: alloc.ServiceProviderNamespace(),
serviceRegWrapper: ar.serviceRegWrapper,
restarter: ar,
taskEnvBuilder: envBuilder,
networkStatusGetter: ar,
logger: hookLogger,
shutdownDelayCtx: ar.shutdownDelayCtx,
alloc: alloc,
namespace: alloc.ServiceProviderNamespace(),
serviceRegWrapper: ar.serviceRegWrapper,
restarter: ar,
taskEnvBuilder: envBuilder,
networkStatus: ar,
logger: hookLogger,
shutdownDelayCtx: ar.shutdownDelayCtx,
}),
newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig),
newConsulHTTPSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig),
newCSIHook(alloc, hookLogger, ar.csiManager, ar.rpcClient, ar, hrs, ar.clientConfig.Node.SecretID),
newChecksHook(hookLogger, alloc, ar.checkStore, ar),
}
return nil

View File

@ -1076,10 +1076,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) {
// Assert that we have an event explaining why we are unhealthy.
require.Len(t, lastUpdate.TaskStates, 1)
state := lastUpdate.TaskStates[task.Name]
require.NotNil(t, state)
require.NotEmpty(t, state.Events)
last := state.Events[len(state.Events)-1]
taskState := lastUpdate.TaskStates[task.Name]
require.NotNil(t, taskState)
require.NotEmpty(t, taskState.Events)
last := taskState.Events[len(taskState.Events)-1]
require.Equal(t, allochealth.AllocHealthEventSource, last.Type)
require.Contains(t, last.Message, "by healthy_deadline")
}

View File

@ -0,0 +1,270 @@
package allocrunner
import (
"context"
"sync"
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/client/serviceregistration/checks"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
// checksHookName is the name of this hook as appears in logs
checksHookName = "checks_hook"
)
// observers maintains a map from check_id -> observer for a particular check. Each
// observer in the map must share the same context.
type observers map[structs.CheckID]*observer
// An observer is used to execute a particular check on its interval and update the
// check store with those results.
type observer struct {
ctx context.Context
cancel context.CancelFunc
checker checks.Checker
checkStore checkstore.Shim
qc *checks.QueryContext
check *structs.ServiceCheck
allocID string
}
// start checking our check on its interval
func (o *observer) start() {
// compromise between immediate (too early) and waiting full interval (slow)
firstWait := o.check.Interval / 2
timer, cancel := helper.NewSafeTimer(firstWait)
defer cancel()
for {
select {
// exit the observer
case <-o.ctx.Done():
return
// time to execute the check
case <-timer.C:
query := checks.GetCheckQuery(o.check)
result := o.checker.Do(o.ctx, o.qc, query)
// and put the results into the store
_ = o.checkStore.Set(o.allocID, result)
// setup timer for next interval
timer.Reset(o.check.Interval)
}
}
}
// stop checking our check - this will also interrupt an in-progress execution
func (o *observer) stop() {
o.cancel()
}
// checksHook manages checks of Nomad service registrations, at both the group and
// task level, by storing / removing them from the Client state store.
//
// Does not manage Consul service checks; see groupServiceHook instead.
type checksHook struct {
logger hclog.Logger
network structs.NetworkStatus
shim checkstore.Shim
checker checks.Checker
allocID string
// fields that get re-initialized on allocation update
lock sync.RWMutex
ctx context.Context
stop func()
observers observers
alloc *structs.Allocation
}
func newChecksHook(
logger hclog.Logger,
alloc *structs.Allocation,
shim checkstore.Shim,
network structs.NetworkStatus,
) *checksHook {
h := &checksHook{
logger: logger.Named(checksHookName),
allocID: alloc.ID,
alloc: alloc,
shim: shim,
network: network,
checker: checks.New(logger),
}
h.initialize(alloc)
return h
}
// initialize the dynamic fields of checksHook, which is to say setup all the
// observers and query context things associated with the alloc.
//
// Should be called during initial setup only.
func (h *checksHook) initialize(alloc *structs.Allocation) {
h.lock.Lock()
defer h.lock.Unlock()
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
if tg == nil {
return
}
// fresh context and stop function for this allocation
h.ctx, h.stop = context.WithCancel(context.Background())
// fresh set of observers
h.observers = make(observers)
// set the initial alloc
h.alloc = alloc
}
// observe will create the observer for each service in services.
// services must use only nomad service provider.
//
// Caller must hold h.lock.
func (h *checksHook) observe(alloc *structs.Allocation, services []*structs.Service) {
var ports structs.AllocatedPorts
var networks structs.Networks
if alloc.AllocatedResources != nil {
ports = alloc.AllocatedResources.Shared.Ports
networks = alloc.AllocatedResources.Shared.Networks
}
for _, service := range services {
for _, check := range service.Checks {
// remember the initialization time
now := time.Now().UTC().Unix()
// create the deterministic check id for this check
id := structs.NomadCheckID(alloc.ID, alloc.TaskGroup, check)
// an observer for this check already exists
if _, exists := h.observers[id]; exists {
continue
}
ctx, cancel := context.WithCancel(h.ctx)
// create the observer for this check
h.observers[id] = &observer{
ctx: ctx,
cancel: cancel,
check: check.Copy(),
checkStore: h.shim,
checker: h.checker,
allocID: h.allocID,
qc: &checks.QueryContext{
ID: id,
CustomAddress: service.Address,
ServicePortLabel: service.PortLabel,
Ports: ports,
Networks: networks,
NetworkStatus: h.network,
Group: alloc.Name,
Task: service.TaskName,
Service: service.Name,
Check: check.Name,
},
}
// insert a pending result into state store for each check
result := checks.Stub(id, structs.GetCheckMode(check), now, alloc.Name, service.TaskName, service.Name, check.Name)
if err := h.shim.Set(h.allocID, result); err != nil {
h.logger.Error("failed to set initial check status", "id", h.allocID, "error", err)
continue
}
// start the observer
go h.observers[id].start()
}
}
}
func (h *checksHook) Name() string {
return checksHookName
}
func (h *checksHook) Prerun() error {
h.lock.Lock()
defer h.lock.Unlock()
group := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
if group == nil {
return nil
}
// create and start observers of nomad service checks in alloc
h.observe(h.alloc, group.NomadServices())
return nil
}
func (h *checksHook) Update(request *interfaces.RunnerUpdateRequest) error {
h.lock.Lock()
defer h.lock.Unlock()
group := request.Alloc.Job.LookupTaskGroup(request.Alloc.TaskGroup)
if group == nil {
return nil
}
// get all group and task level services using nomad provider
services := group.NomadServices()
// create a set of the updated set of checks
next := make([]structs.CheckID, 0, len(h.observers))
for _, service := range services {
for _, check := range service.Checks {
next = append(next, structs.NomadCheckID(
request.Alloc.ID,
request.Alloc.TaskGroup,
check,
))
}
}
// stop the observers of the checks we are removing
remove := h.shim.Difference(request.Alloc.ID, next)
for _, id := range remove {
h.observers[id].stop()
delete(h.observers, id)
}
// remove checks that are no longer part of the allocation
if err := h.shim.Remove(request.Alloc.ID, remove); err != nil {
return err
}
// remember this new alloc
h.alloc = request.Alloc
// ensure we are observing new checks (idempotent)
h.observe(request.Alloc, services)
return nil
}
func (h *checksHook) PreKill() {
h.lock.Lock()
defer h.lock.Unlock()
// terminate our hook context, which threads down to all observers
h.stop()
// purge all checks for this allocation from the client state store
if err := h.shim.Purge(h.allocID); err != nil {
h.logger.Error("failed to purge check results", "alloc_id", h.allocID, "error", err)
}
}

View File

@ -0,0 +1,312 @@
package allocrunner
import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
"github.com/hashicorp/nomad/client/state"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
)
var (
_ interfaces.RunnerPrerunHook = (*checksHook)(nil)
_ interfaces.RunnerUpdateHook = (*checksHook)(nil)
_ interfaces.RunnerPreKillHook = (*checksHook)(nil)
)
func makeCheckStore(logger hclog.Logger) checkstore.Shim {
db := state.NewMemDB(logger)
checkStore := checkstore.NewStore(logger, db)
return checkStore
}
func allocWithNomadChecks(addr, port string, onGroup bool) *structs.Allocation {
alloc := mock.Alloc()
group := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
task := "task-one"
if onGroup {
task = ""
}
services := []*structs.Service{
{
Name: "service-one",
TaskName: "web",
PortLabel: port,
AddressMode: "auto",
Address: addr,
Provider: "nomad",
Checks: []*structs.ServiceCheck{
{
Name: "check-ok",
Type: "http",
Path: "/",
Protocol: "http",
PortLabel: port,
AddressMode: "auto",
Interval: 250 * time.Millisecond,
Timeout: 1 * time.Second,
Method: "GET",
TaskName: task,
},
{
Name: "check-error",
Type: "http",
Path: "/fail",
Protocol: "http",
PortLabel: port,
AddressMode: "auto",
Interval: 250 * time.Millisecond,
Timeout: 1 * time.Second,
Method: "GET",
TaskName: task,
},
{
Name: "check-hang",
Type: "http",
Path: "/hang",
Protocol: "http",
PortLabel: port,
AddressMode: "auto",
Interval: 250 * time.Millisecond,
Timeout: 500 * time.Millisecond,
Method: "GET",
TaskName: task,
},
},
},
}
switch onGroup {
case true:
group.Tasks[0].Services = nil
group.Services = services
case false:
group.Services = nil
group.Tasks[0].Services = services
}
return alloc
}
func allocWithDifferentNomadChecks(id, addr, port string) *structs.Allocation {
alloc := allocWithNomadChecks(addr, port, true)
alloc.ID = id
group := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
group.Services[0].Checks[2].Path = "/" // the hanging check is now ok
// append 4th check, this one is failing
group.Services[0].Checks = append(group.Services[0].Checks, &structs.ServiceCheck{
Name: "check-error-2",
Type: "http",
Path: "/fail",
Protocol: "http",
PortLabel: port,
AddressMode: "auto",
Interval: 250 * time.Millisecond,
Timeout: 1 * time.Second,
Method: "GET",
})
return alloc
}
var checkHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/fail":
w.WriteHeader(500)
_, _ = io.WriteString(w, "500 problem")
case "/hang":
time.Sleep(2 * time.Second)
_, _ = io.WriteString(w, "too slow")
default:
w.WriteHeader(200)
_, _ = io.WriteString(w, "200 ok")
}
})
func TestCheckHook_Checks_ResultsSet(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
// create an http server with various responses
ts := httptest.NewServer(checkHandler)
defer ts.Close()
cases := []struct {
name string
onGroup bool
}{
{name: "group-level", onGroup: true},
{name: "task-level", onGroup: false},
}
for _, tc := range cases {
checkStore := makeCheckStore(logger)
// get the address and port for http server
tokens := strings.Split(ts.URL, ":")
addr, port := strings.TrimPrefix(tokens[1], "//"), tokens[2]
network := mock.NewNetworkStatus(addr)
alloc := allocWithNomadChecks(addr, port, tc.onGroup)
h := newChecksHook(logger, alloc, checkStore, network)
// initialize is called; observers are created but not started yet
must.MapEmpty(t, h.observers)
// calling pre-run starts the observers
err := h.Prerun()
must.NoError(t, err)
testutil.WaitForResultUntil(
2*time.Second,
func() (bool, error) {
results := checkStore.List(alloc.ID)
passing, failing, pending := 0, 0, 0
for _, result := range results {
switch result.Status {
case structs.CheckSuccess:
passing++
case structs.CheckFailure:
failing++
case structs.CheckPending:
pending++
}
}
if passing != 1 || failing != 2 || pending != 0 {
fmt.Printf("results %v\n", results)
return false, fmt.Errorf(
"expected 1 passing, 2 failing, 0 pending, got %d passing, %d failing, %d pending",
passing, failing, pending,
)
}
return true, nil
},
func(err error) {
t.Fatalf(err.Error())
},
)
h.PreKill() // stop observers, cleanup
// assert shim no longer contains results for the alloc
results := checkStore.List(alloc.ID)
must.MapEmpty(t, results)
}
}
func TestCheckHook_Checks_UpdateSet(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
// create an http server with various responses
ts := httptest.NewServer(checkHandler)
defer ts.Close()
// get the address and port for http server
tokens := strings.Split(ts.URL, ":")
addr, port := strings.TrimPrefix(tokens[1], "//"), tokens[2]
shim := makeCheckStore(logger)
network := mock.NewNetworkStatus(addr)
alloc := allocWithNomadChecks(addr, port, true)
h := newChecksHook(logger, alloc, shim, network)
// calling pre-run starts the observers
err := h.Prerun()
must.NoError(t, err)
// initial set of checks
testutil.WaitForResultUntil(
2*time.Second,
func() (bool, error) {
results := shim.List(alloc.ID)
passing, failing, pending := 0, 0, 0
for _, result := range results {
switch result.Status {
case structs.CheckSuccess:
passing++
case structs.CheckFailure:
failing++
case structs.CheckPending:
pending++
}
}
if passing != 1 || failing != 2 || pending != 0 {
fmt.Printf("results %v\n", results)
return false, fmt.Errorf(
"(initial set) expected 1 passing, 2 failing, 0 pending, got %d passing, %d failing, %d pending",
passing, failing, pending,
)
}
return true, nil
},
func(err error) {
t.Fatalf(err.Error())
},
)
request := &interfaces.RunnerUpdateRequest{
Alloc: allocWithDifferentNomadChecks(alloc.ID, addr, port),
}
err = h.Update(request)
must.NoError(t, err)
// updated set of checks
testutil.WaitForResultUntil(
2*time.Second,
func() (bool, error) {
results := shim.List(alloc.ID)
passing, failing, pending := 0, 0, 0
for _, result := range results {
switch result.Status {
case structs.CheckSuccess:
passing++
case structs.CheckFailure:
failing++
case structs.CheckPending:
pending++
}
}
if passing != 2 || failing != 2 || pending != 0 {
fmt.Printf("results %v\n", results)
return false, fmt.Errorf(
"(updated set) expected 2 passing, 2 failing, 0 pending, got %d passing, %d failing, %d pending",
passing, failing, pending,
)
}
return true, nil
},
func(err error) {
t.Fatalf(err.Error())
},
)
h.PreKill() // stop observers, cleanup
// assert shim no longer contains results for the alloc
results := shim.List(alloc.ID)
must.MapEmpty(t, results)
}

View File

@ -12,6 +12,7 @@ import (
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
"github.com/hashicorp/nomad/client/serviceregistration"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
cstate "github.com/hashicorp/nomad/client/state"
"github.com/hashicorp/nomad/client/vaultclient"
@ -87,6 +88,9 @@ type Config struct {
// to perform service and check registration and deregistration.
ServiceRegWrapper *wrapper.HandlerWrapper
// CheckStore contains check result information.
CheckStore checkstore.Shim
// Getter is an interface for retrieving artifacts.
Getter interfaces.ArtifactGetter
}

View File

@ -11,6 +11,7 @@ import (
"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
"github.com/hashicorp/nomad/client/taskenv"
agentconsul "github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)
@ -18,21 +19,17 @@ const (
groupServiceHookName = "group_services"
)
type networkStatusGetter interface {
NetworkStatus() *structs.AllocNetworkStatus
}
// groupServiceHook manages task group Consul service registration and
// deregistration.
type groupServiceHook struct {
allocID string
jobID string
group string
restarter agentconsul.WorkloadRestarter
prerun bool
deregistered bool
networkStatusGetter networkStatusGetter
shutdownDelayCtx context.Context
allocID string
jobID string
group string
restarter agentconsul.WorkloadRestarter
prerun bool
deregistered bool
networkStatus structs.NetworkStatus
shutdownDelayCtx context.Context
// namespace is the Nomad or Consul namespace in which service
// registrations will be made. This field may be updated.
@ -58,12 +55,12 @@ type groupServiceHook struct {
}
type groupServiceHookConfig struct {
alloc *structs.Allocation
restarter agentconsul.WorkloadRestarter
taskEnvBuilder *taskenv.Builder
networkStatusGetter networkStatusGetter
shutdownDelayCtx context.Context
logger log.Logger
alloc *structs.Allocation
restarter agentconsul.WorkloadRestarter
taskEnvBuilder *taskenv.Builder
networkStatus structs.NetworkStatus
shutdownDelayCtx context.Context
logger log.Logger
// namespace is the Nomad or Consul namespace in which service
// registrations will be made.
@ -83,18 +80,18 @@ func newGroupServiceHook(cfg groupServiceHookConfig) *groupServiceHook {
}
h := &groupServiceHook{
allocID: cfg.alloc.ID,
jobID: cfg.alloc.JobID,
group: cfg.alloc.TaskGroup,
restarter: cfg.restarter,
namespace: cfg.namespace,
taskEnvBuilder: cfg.taskEnvBuilder,
delay: shutdownDelay,
networkStatusGetter: cfg.networkStatusGetter,
logger: cfg.logger.Named(groupServiceHookName),
serviceRegWrapper: cfg.serviceRegWrapper,
services: tg.Services,
shutdownDelayCtx: cfg.shutdownDelayCtx,
allocID: cfg.alloc.ID,
jobID: cfg.alloc.JobID,
group: cfg.alloc.TaskGroup,
restarter: cfg.restarter,
namespace: cfg.namespace,
taskEnvBuilder: cfg.taskEnvBuilder,
delay: shutdownDelay,
networkStatus: cfg.networkStatus,
logger: cfg.logger.Named(groupServiceHookName),
serviceRegWrapper: cfg.serviceRegWrapper,
services: tg.Services,
shutdownDelayCtx: cfg.shutdownDelayCtx,
}
if cfg.alloc.AllocatedResources != nil {
@ -210,10 +207,13 @@ func (h *groupServiceHook) preKillLocked() {
h.logger.Debug("delay before killing tasks", "group", h.group, "shutdown_delay", h.delay)
timer, cancel := helper.NewSafeTimer(h.delay)
defer cancel()
select {
// Wait for specified shutdown_delay unless ignored
// This will block an agent from shutting down.
case <-time.After(h.delay):
case <-timer.C:
case <-h.shutdownDelayCtx.Done():
}
}
@ -241,8 +241,8 @@ func (h *groupServiceHook) getWorkloadServices() *serviceregistration.WorkloadSe
interpolatedServices := taskenv.InterpolateServices(h.taskEnvBuilder.Build(), h.services)
var netStatus *structs.AllocNetworkStatus
if h.networkStatusGetter != nil {
netStatus = h.networkStatusGetter.NetworkStatus()
if h.networkStatus != nil {
netStatus = h.networkStatus.NetworkStatus()
}
// Create task services struct with request's driver metadata

View File

@ -6,35 +6,39 @@ import (
"sync"
"time"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/allochealth"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/client/serviceregistration"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/nomad/structs"
)
// healthMutator is able to set/clear alloc health.
// healthSetter is able to set/clear alloc health.
type healthSetter interface {
// HasHealth returns true if health is already set.
HasHealth() bool
// Set health via the mutator
// SetHealth via the mutator.
SetHealth(healthy, isDeploy bool, taskEvents map[string]*structs.TaskEvent)
// Clear health when the deployment ID changes
// ClearHealth for when the deployment ID changes.
ClearHealth()
}
// allocHealthWatcherHook is responsible for watching an allocation's task
// status and (optionally) Consul health check status to determine if the
// allocation is health or unhealthy. Used by deployments and migrations.
// allocation is healthy or unhealthy. Used by deployments and migrations.
type allocHealthWatcherHook struct {
healthSetter healthSetter
// consul client used to monitor health checks
// consul client used to monitor Consul service health checks
consul serviceregistration.Handler
// checkStore is used to monitor Nomad service health checks
checkStore checkstore.Shim
// listener is given to trackers to listen for alloc updates and closed
// when the alloc is destroyed.
listener *cstructs.AllocListener
@ -64,11 +68,11 @@ type allocHealthWatcherHook struct {
// hold hookLock to access.
isDeploy bool
logger log.Logger
logger hclog.Logger
}
func newAllocHealthWatcherHook(logger log.Logger, alloc *structs.Allocation, hs healthSetter,
listener *cstructs.AllocListener, consul serviceregistration.Handler) interfaces.RunnerHook {
func newAllocHealthWatcherHook(logger hclog.Logger, alloc *structs.Allocation, hs healthSetter,
listener *cstructs.AllocListener, consul serviceregistration.Handler, checkStore checkstore.Shim) interfaces.RunnerHook {
// Neither deployments nor migrations care about the health of
// non-service jobs so never watch their health
@ -85,6 +89,7 @@ func newAllocHealthWatcherHook(logger log.Logger, alloc *structs.Allocation, hs
cancelFn: func() {}, // initialize to prevent nil func panics
watchDone: closedDone,
consul: consul,
checkStore: checkStore,
healthSetter: hs,
listener: listener,
}
@ -132,8 +137,9 @@ func (h *allocHealthWatcherHook) init() error {
h.logger.Trace("watching", "deadline", deadline, "checks", useChecks, "min_healthy_time", minHealthyTime)
// Create a new tracker, start it, and watch for health results.
tracker := allochealth.NewTracker(ctx, h.logger, h.alloc,
h.listener, h.consul, minHealthyTime, useChecks)
tracker := allochealth.NewTracker(
ctx, h.logger, h.alloc, h.listener, h.consul, h.checkStore, minHealthyTime, useChecks,
)
tracker.Start()
// Create a new done chan and start watching for health updates
@ -194,7 +200,7 @@ func (h *allocHealthWatcherHook) Postrun() error {
func (h *allocHealthWatcherHook) Shutdown() {
// Same as Postrun
h.Postrun()
_ = h.Postrun()
}
// watchHealth watches alloc health until it is set, the alloc is stopped, the

View File

@ -96,7 +96,8 @@ func TestHealthHook_PrerunPostrun(t *testing.T) {
consul := regMock.NewServiceRegistrationHandler(logger)
hs := &mockHealthSetter{}
h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul)
checks := new(mock.CheckShim)
h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul, checks)
// Assert we implemented the right interfaces
prerunh, ok := h.(interfaces.RunnerPrerunHook)
@ -134,7 +135,8 @@ func TestHealthHook_PrerunUpdatePostrun(t *testing.T) {
consul := regMock.NewServiceRegistrationHandler(logger)
hs := &mockHealthSetter{}
h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook)
checks := new(mock.CheckShim)
h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook)
// Prerun
require.NoError(h.Prerun())
@ -173,7 +175,8 @@ func TestHealthHook_UpdatePrerunPostrun(t *testing.T) {
consul := regMock.NewServiceRegistrationHandler(logger)
hs := &mockHealthSetter{}
h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook)
checks := new(mock.CheckShim)
h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook)
// Set a DeploymentID to cause ClearHealth to be called
alloc.DeploymentID = uuid.Generate()
@ -214,7 +217,8 @@ func TestHealthHook_Postrun(t *testing.T) {
consul := regMock.NewServiceRegistrationHandler(logger)
hs := &mockHealthSetter{}
h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul).(*allocHealthWatcherHook)
checks := new(mock.CheckShim)
h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook)
// Postrun
require.NoError(h.Postrun())
@ -280,7 +284,8 @@ func TestHealthHook_SetHealth_healthy(t *testing.T) {
hs := newMockHealthSetter()
h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook)
checks := new(mock.CheckShim)
h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook)
// Prerun
require.NoError(h.Prerun())
@ -368,7 +373,8 @@ func TestHealthHook_SetHealth_unhealthy(t *testing.T) {
hs := newMockHealthSetter()
h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook)
checks := new(mock.CheckShim)
h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook)
// Prerun
require.NoError(h.Prerun())
@ -389,7 +395,7 @@ func TestHealthHook_SetHealth_unhealthy(t *testing.T) {
func TestHealthHook_SystemNoop(t *testing.T) {
ci.Parallel(t)
h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.SystemAlloc(), nil, nil, nil)
h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.SystemAlloc(), nil, nil, nil, nil)
// Assert that it's the noop impl
_, ok := h.(noopAllocHealthWatcherHook)
@ -410,7 +416,7 @@ func TestHealthHook_SystemNoop(t *testing.T) {
func TestHealthHook_BatchNoop(t *testing.T) {
ci.Parallel(t)
h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.BatchAlloc(), nil, nil, nil)
h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.BatchAlloc(), nil, nil, nil, nil)
// Assert that it's the noop impl
_, ok := h.(noopAllocHealthWatcherHook)

View File

@ -4,19 +4,19 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
)
// RunnnerHook is a lifecycle hook into the life cycle of an allocation runner.
// RunnerHook is a lifecycle hook into the life cycle of an allocation runner.
type RunnerHook interface {
Name() string
}
// RunnerPrerunHooks are executed before calling TaskRunner.Run for
// A RunnerPrerunHook is executed before calling TaskRunner.Run for
// non-terminal allocations. Terminal allocations do *not* call prerun.
type RunnerPrerunHook interface {
RunnerHook
Prerun() error
}
// RunnerPreKillHooks are executed inside of KillTasks before
// A RunnerPreKillHook is executed inside of KillTasks before
// iterating and killing each task. It will run before the Leader
// task is killed.
type RunnerPreKillHook interface {
@ -25,7 +25,7 @@ type RunnerPreKillHook interface {
PreKill()
}
// RunnerPostrunHooks are executed after calling TaskRunner.Run, even for
// A RunnerPostrunHook is executed after calling TaskRunner.Run, even for
// terminal allocations. Therefore Postrun hooks must be safe to call without
// first calling Prerun hooks.
type RunnerPostrunHook interface {
@ -33,7 +33,7 @@ type RunnerPostrunHook interface {
Postrun() error
}
// RunnerDestroyHooks are executed after AllocRunner.Run has exited and must
// A RunnerDestroyHook is executed after AllocRunner.Run has exited and must
// make a best effort cleanup allocation resources. Destroy hooks must be safe
// to call without first calling Prerun.
type RunnerDestroyHook interface {
@ -41,10 +41,10 @@ type RunnerDestroyHook interface {
Destroy() error
}
// RunnerUpdateHooks are executed when an allocation update is received from
// A RunnerUpdateHook is executed when an allocation update is received from
// the server. Update is called concurrently with AllocRunner execution and
// therefore must be safe for concurrent access with other hook methods. Calls
// to Update are serialized so allocaiton updates will always be processed in
// to Update are serialized so allocation updates will always be processed in
// order.
type RunnerUpdateHook interface {
RunnerHook
@ -55,7 +55,7 @@ type RunnerUpdateRequest struct {
Alloc *structs.Allocation
}
// RunnerTaskRestartHooks are executed just before the allocation runner is
// A RunnerTaskRestartHook is executed just before the allocation runner is
// going to restart all tasks.
type RunnerTaskRestartHook interface {
RunnerHook

View File

@ -14,6 +14,7 @@ import (
"github.com/hashicorp/nomad/client/devicemanager"
"github.com/hashicorp/nomad/client/lib/cgutil"
"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
"github.com/hashicorp/nomad/client/serviceregistration/mock"
"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
"github.com/hashicorp/nomad/client/state"
@ -67,12 +68,14 @@ func testAllocRunnerConfig(t *testing.T, alloc *structs.Allocation) (*Config, fu
consulRegMock := mock.NewServiceRegistrationHandler(clientConf.Logger)
nomadRegMock := mock.NewServiceRegistrationHandler(clientConf.Logger)
stateDB := new(state.NoopDB)
conf := &Config{
// Copy the alloc in case the caller edits and reuses it
Alloc: alloc.Copy(),
Logger: clientConf.Logger,
ClientConfig: clientConf,
StateDB: state.NoopDB{},
StateDB: stateDB,
Consul: consulRegMock,
ConsulSI: consul.NewMockServiceIdentitiesClient(),
Vault: vaultclient.NewMockVaultClient(),
@ -84,6 +87,7 @@ func testAllocRunnerConfig(t *testing.T, alloc *structs.Allocation) (*Config, fu
CpusetManager: new(cgutil.NoopCpusetManager),
ServersContactedCh: make(chan struct{}),
ServiceRegWrapper: wrapper.NewHandlerWrapper(clientConf.Logger, consulRegMock, nomadRegMock),
CheckStore: checkstore.NewStore(clientConf.Logger, stateDB),
Getter: getter.TestDefaultGetter(t),
}

View File

@ -1 +0,0 @@
package allocrunner

View File

@ -37,6 +37,7 @@ import (
"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
"github.com/hashicorp/nomad/client/servers"
"github.com/hashicorp/nomad/client/serviceregistration"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
"github.com/hashicorp/nomad/client/serviceregistration/nsd"
"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
"github.com/hashicorp/nomad/client/state"
@ -166,7 +167,7 @@ type AllocRunner interface {
}
// Client is used to implement the client interaction with Nomad. Clients
// are expected to register as a schedulable node to the servers, and to
// are expected to register as a schedule-able node to the servers, and to
// run allocations as determined by the servers.
type Client struct {
config *config.Config
@ -237,6 +238,10 @@ type Client struct {
// registrations.
nomadService serviceregistration.Handler
// checkStore is used to store group and task checks and their current pass/fail
// status.
checkStore checkstore.Shim
// serviceRegWrapper wraps the consulService and nomadService
// implementations so that the alloc and task runner service hooks can call
// this without needing to identify which backend provider should be used.
@ -693,6 +698,10 @@ func (c *Client) init() error {
c.cpusetManager = new(cgutil.NoopCpusetManager)
}
}
// setup the check store
c.checkStore = checkstore.NewStore(c.logger, c.stateDB)
return nil
}
@ -1175,6 +1184,7 @@ func (c *Client) restoreState() error {
DriverManager: c.drivermanager,
ServersContactedCh: c.serversContactedCh,
ServiceRegWrapper: c.serviceRegWrapper,
CheckStore: c.checkStore,
RPCClient: c,
Getter: c.getter,
}
@ -2513,6 +2523,7 @@ func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error
DeviceManager: c.devicemanager,
DriverManager: c.drivermanager,
ServiceRegWrapper: c.serviceRegWrapper,
CheckStore: c.checkStore,
RPCClient: c,
Getter: c.getter,
}

View File

@ -0,0 +1,147 @@
package checkstore
import (
"sync"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/serviceregistration/checks"
"github.com/hashicorp/nomad/client/state"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
"golang.org/x/exp/slices"
)
// A Shim is used to track the latest check status information, one layer above
// the client persistent store so we can do efficient indexing, etc.
type Shim interface {
// Set the latest result for a specific check.
Set(allocID string, result *structs.CheckQueryResult) error
// List the latest results for a specific allocation.
List(allocID string) map[structs.CheckID]*structs.CheckQueryResult
// Difference returns the set of IDs being stored that are not in ids.
Difference(allocID string, ids []structs.CheckID) []structs.CheckID
// Remove will remove ids from the cache and persistent store.
Remove(allocID string, ids []structs.CheckID) error
// Purge results for a specific allocation.
Purge(allocID string) error
}
type shim struct {
log hclog.Logger
db state.StateDB
lock sync.RWMutex
current checks.ClientResults
}
// NewStore creates a new store.
func NewStore(log hclog.Logger, db state.StateDB) Shim {
s := &shim{
log: log.Named("check_store"),
db: db,
current: make(checks.ClientResults),
}
s.restore()
return s
}
func (s *shim) restore() {
s.lock.Lock()
defer s.lock.Unlock()
results, err := s.db.GetCheckResults()
if err != nil {
s.log.Error("failed to restore health check results", "error", err)
return
}
for id, m := range results {
s.current[id] = helper.CopyMap(m)
}
}
func (s *shim) Set(allocID string, qr *structs.CheckQueryResult) error {
s.lock.Lock()
defer s.lock.Unlock()
if _, exists := s.current[allocID]; !exists {
s.current[allocID] = make(map[structs.CheckID]*structs.CheckQueryResult)
}
// lookup existing result
previous, exists := s.current[allocID][qr.ID]
// only insert a stub if no result exists yet
if qr.Status == structs.CheckPending && exists {
return nil
}
s.log.Trace("setting check status", "alloc_id", allocID, "check_id", qr.ID, "status", qr.Status)
// always keep in-memory shim up to date with latest result
s.current[allocID][qr.ID] = qr
// only update persistent store if status changes (optimization)
// on Client restart restored check results may be outdated but the status
// is the same as the most recent result
if !exists || previous.Status != qr.Status {
return s.db.PutCheckResult(allocID, qr)
}
return nil
}
func (s *shim) List(allocID string) map[structs.CheckID]*structs.CheckQueryResult {
s.lock.RLock()
defer s.lock.RUnlock()
m, exists := s.current[allocID]
if !exists {
return nil
}
return helper.CopyMap(m)
}
func (s *shim) Purge(allocID string) error {
s.lock.Lock()
defer s.lock.Unlock()
// remove from our map
delete(s.current, allocID)
// remove from persistent store
return s.db.PurgeCheckResults(allocID)
}
func (s *shim) Remove(allocID string, ids []structs.CheckID) error {
s.lock.Lock()
defer s.lock.Unlock()
// remove from cache
for _, id := range ids {
delete(s.current[allocID], id)
}
// remove from persistent store
return s.db.DeleteCheckResults(allocID, ids)
}
func (s *shim) Difference(allocID string, ids []structs.CheckID) []structs.CheckID {
s.lock.Lock()
defer s.lock.Unlock()
var remove []structs.CheckID
for id := range s.current[allocID] {
if !slices.Contains(ids, id) {
remove = append(remove, id)
}
}
return remove
}

View File

@ -0,0 +1,379 @@
package checkstore
import (
"testing"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/serviceregistration/checks"
"github.com/hashicorp/nomad/client/state"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
"golang.org/x/exp/slices"
)
var (
success = structs.CheckSuccess
failure = structs.CheckFailure
pending = structs.CheckPending
)
func newQR(id string, status structs.CheckStatus) *structs.CheckQueryResult {
return &structs.CheckQueryResult{
ID: structs.CheckID(id),
Status: status,
}
}
// alias for brevity
type qrMap = map[structs.CheckID]*structs.CheckQueryResult
func TestShim_New(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
t.Run("restore empty", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
m := s.List("none")
must.MapEmpty(t, m)
})
t.Run("restore checks", func(t *testing.T) {
db := state.NewMemDB(logger)
must.NoError(t, db.PutCheckResult("alloc1", newQR("id1", success)))
must.NoError(t, db.PutCheckResult("alloc1", newQR("id2", failure)))
must.NoError(t, db.PutCheckResult("alloc2", newQR("id3", pending)))
s := NewStore(logger, db)
m1 := s.List("alloc1")
must.MapEq(t, qrMap{
"id1": newQR("id1", success),
"id2": newQR("id2", failure),
}, m1)
m2 := s.List("alloc2")
must.MapEq(t, qrMap{"id3": newQR("id3", pending)}, m2)
})
}
func TestShim_Set(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
t.Run("insert pending", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert initial pending check into empty database
qr1 := newQR("id1", pending)
qr1.Timestamp = 1
must.NoError(t, s.Set("alloc1", qr1))
// ensure underlying db has check
internal, err := db.GetCheckResults()
must.NoError(t, err)
must.Eq(t, checks.ClientResults{"alloc1": {"id1": qr1}}, internal)
})
t.Run("ignore followup pending", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert a check
qr1 := newQR("id1", success)
qr1.Timestamp = 1
must.NoError(t, s.Set("alloc1", qr1))
// insert a followup pending check (e.g. client restart)
qr2 := newQR("id1", pending)
qr2.Timestamp = 2
t.Run("into existing", func(t *testing.T) {
must.NoError(t, s.Set("alloc1", qr2))
})
// ensure shim maintains success result
list := s.List("alloc1")
must.Eq(t, qrMap{"id1": qr1}, list)
// ensure underlying db maintains success result
internal, err := db.GetCheckResults()
must.NoError(t, err)
must.Eq(t, checks.ClientResults{"alloc1": {"id1": qr1}}, internal)
})
t.Run("insert status change", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert initial check, success
qr1 := newQR("id1", success)
must.NoError(t, s.Set("alloc1", qr1))
// insert followup check, failure
qr2 := newQR("id1", failure)
must.NoError(t, s.Set("alloc1", qr2))
// ensure shim sees newest status result
list := s.List("alloc1")
must.Eq(t, qrMap{"id1": qr2}, list)
// ensure underlying db sees newest status result
internal, err := db.GetCheckResults()
must.NoError(t, err)
must.Eq(t, checks.ClientResults{"alloc1": {"id1": qr2}}, internal)
})
t.Run("insert status same", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert initial check, success
qr1 := newQR("id1", success)
qr1.Timestamp = 1
must.NoError(t, s.Set("alloc1", qr1))
// insert followup check, also success
qr2 := newQR("id1", success)
qr2.Timestamp = 2
must.NoError(t, s.Set("alloc1", qr2))
// ensure shim sees newest status result
list := s.List("alloc1")
must.Eq(t, qrMap{"id1": qr2}, list)
// ensure underlying db sees stale result (optimization)
internal, err := db.GetCheckResults()
must.NoError(t, err)
must.Eq(t, checks.ClientResults{"alloc1": {"id1": qr1}}, internal)
})
}
func TestShim_List(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
t.Run("list empty", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
list := s.List("alloc1")
must.MapEmpty(t, list)
})
t.Run("list mix", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert some checks
must.NoError(t, s.Set("alloc1", newQR("id1", success)))
must.NoError(t, s.Set("alloc1", newQR("id2", failure)))
must.NoError(t, s.Set("alloc2", newQR("id1", pending)))
list1 := s.List("alloc1")
must.MapEq(t, qrMap{
"id1": newQR("id1", success),
"id2": newQR("id2", failure),
}, list1)
list2 := s.List("alloc2")
must.MapEq(t, qrMap{
"id1": newQR("id1", pending),
}, list2)
internal, err := db.GetCheckResults()
must.NoError(t, err)
must.MapEq(t, checks.ClientResults{
"alloc1": {
"id1": newQR("id1", success),
"id2": newQR("id2", failure),
},
"alloc2": {
"id1": newQR("id1", pending),
},
}, internal)
})
}
func TestShim_Difference(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
t.Run("empty store", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
ids := []structs.CheckID{"id1", "id2", "id3"}
unwanted := s.Difference("alloc1", ids)
must.Empty(t, unwanted)
})
t.Run("empty unwanted", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert some checks
must.NoError(t, s.Set("alloc1", newQR("id1", success)))
must.NoError(t, s.Set("alloc1", newQR("id2", failure)))
must.NoError(t, s.Set("alloc2", newQR("id1", pending)))
var ids []structs.CheckID
var exp = []structs.CheckID{"id1", "id2"}
unwanted := s.Difference("alloc1", ids)
slices.Sort(unwanted)
must.Eq(t, exp, unwanted)
})
t.Run("subset unwanted", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert some checks
must.NoError(t, s.Set("alloc1", newQR("id1", success)))
must.NoError(t, s.Set("alloc1", newQR("id2", failure)))
must.NoError(t, s.Set("alloc1", newQR("id3", success)))
must.NoError(t, s.Set("alloc1", newQR("id4", success)))
must.NoError(t, s.Set("alloc1", newQR("id5", pending)))
ids := []structs.CheckID{"id1", "id3", "id4"}
exp := []structs.CheckID{"id2", "id5"}
unwanted := s.Difference("alloc1", ids)
slices.Sort(unwanted)
must.Eq(t, exp, unwanted)
})
t.Run("unexpected unwanted", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert some checks
must.NoError(t, s.Set("alloc1", newQR("id1", success)))
must.NoError(t, s.Set("alloc1", newQR("id2", failure)))
must.NoError(t, s.Set("alloc1", newQR("id3", success)))
ids := []structs.CheckID{"id1", "id4"}
exp := []structs.CheckID{"id2", "id3"}
unwanted := s.Difference("alloc1", ids)
slices.Sort(unwanted)
must.Eq(t, exp, unwanted)
})
}
func TestShim_Remove(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
t.Run("remove from empty store", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
ids := []structs.CheckID{"id1", "id2"}
err := s.Remove("alloc1", ids)
must.NoError(t, err)
})
t.Run("remove empty set from store", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert some checks
must.NoError(t, s.Set("alloc1", newQR("id1", success)))
must.NoError(t, s.Set("alloc1", newQR("id2", failure)))
must.NoError(t, s.Set("alloc2", newQR("id1", pending)))
var ids []structs.CheckID
err := s.Remove("alloc1", ids)
must.NoError(t, err)
// ensure shim still contains checks
list := s.List("alloc1")
must.Eq(t, qrMap{"id1": newQR("id1", success), "id2": newQR("id2", failure)}, list)
// ensure underlying db still contains all checks
internal, err := db.GetCheckResults()
must.NoError(t, err)
must.Eq(t, checks.ClientResults{
"alloc1": {
"id1": newQR("id1", success),
"id2": newQR("id2", failure),
},
"alloc2": {
"id1": newQR("id1", pending),
},
}, internal)
})
t.Run("remove subset from store", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert some checks
must.NoError(t, s.Set("alloc1", newQR("id1", success)))
must.NoError(t, s.Set("alloc1", newQR("id2", failure)))
must.NoError(t, s.Set("alloc1", newQR("id3", success)))
must.NoError(t, s.Set("alloc1", newQR("id4", pending)))
must.NoError(t, s.Set("alloc2", newQR("id1", pending)))
must.NoError(t, s.Set("alloc2", newQR("id2", success)))
ids := []structs.CheckID{"id1", "id4"}
err := s.Remove("alloc1", ids)
must.NoError(t, err)
// ensure shim still contains remaining checks
list := s.List("alloc1")
must.Eq(t, qrMap{"id2": newQR("id2", failure), "id3": newQR("id3", success)}, list)
// ensure underlying db still contains remaining checks
internal, err := db.GetCheckResults()
must.NoError(t, err)
must.MapEq(t, checks.ClientResults{
"alloc1": {
"id2": newQR("id2", failure),
"id3": newQR("id3", success),
},
"alloc2": {
"id1": newQR("id1", pending),
"id2": newQR("id2", success),
},
}, internal)
})
}
func TestShim_Purge(t *testing.T) {
ci.Parallel(t)
logger := testlog.HCLogger(t)
t.Run("purge from empty", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
err := s.Purge("alloc1")
must.NoError(t, err)
})
t.Run("purge one alloc", func(t *testing.T) {
db := state.NewMemDB(logger)
s := NewStore(logger, db)
// insert some checks
must.NoError(t, s.Set("alloc1", newQR("id1", success)))
must.NoError(t, s.Set("alloc1", newQR("id2", failure)))
must.NoError(t, s.Set("alloc2", newQR("id1", pending)))
err := s.Purge("alloc1")
must.NoError(t, err)
// ensure alloc1 is gone from shim
list1 := s.List("alloc1")
must.MapEmpty(t, list1)
// ensure alloc2 remains in shim
list2 := s.List("alloc2")
must.MapEq(t, qrMap{"id1": newQR("id1", pending)}, list2)
// ensure alloc is gone from underlying db
internal, err := db.GetCheckResults()
must.NoError(t, err)
must.MapEq(t, checks.ClientResults{
"alloc2": {"id1": newQR("id1", pending)},
}, internal)
})
}

View File

@ -0,0 +1,209 @@
package checks
import (
"bytes"
"context"
"fmt"
"io"
"net"
"net/http"
"net/url"
"strconv"
"time"
"github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/serviceregistration"
"github.com/hashicorp/nomad/nomad/structs"
"oss.indeed.com/go/libtime"
)
const (
// maxTimeoutHTTP is a fail-safe value for the HTTP client, ensuring a Nomad
// Client does not leak goroutines hanging on to unresponsive endpoints.
maxTimeoutHTTP = 10 * time.Minute
)
// Checker executes a check given an allocation-specific context, and produces
// a resulting structs.CheckQueryResult
type Checker interface {
Do(context.Context, *QueryContext, *Query) *structs.CheckQueryResult
}
// New creates a new Checker capable of executing HTTP and TCP checks.
func New(log hclog.Logger) Checker {
httpClient := cleanhttp.DefaultPooledClient()
httpClient.Timeout = maxTimeoutHTTP
return &checker{
log: log.Named("checks"),
httpClient: httpClient,
clock: libtime.SystemClock(),
}
}
type checker struct {
log hclog.Logger
clock libtime.Clock
httpClient *http.Client
}
func (c *checker) now() int64 {
return c.clock.Now().UTC().Unix()
}
// Do will execute the Query given the QueryContext and produce a structs.CheckQueryResult
func (c *checker) Do(ctx context.Context, qc *QueryContext, q *Query) *structs.CheckQueryResult {
var qr *structs.CheckQueryResult
timeout, cancel := context.WithTimeout(ctx, q.Timeout)
defer cancel()
switch q.Type {
case "http":
qr = c.checkHTTP(timeout, qc, q)
default:
qr = c.checkTCP(timeout, qc, q)
}
qr.ID = qc.ID
qr.Group = qc.Group
qr.Task = qc.Task
qr.Service = qc.Service
qr.Check = qc.Check
return qr
}
// resolve the address to use when executing Query given a QueryContext
func address(qc *QueryContext, q *Query) (string, error) {
mode := q.AddressMode
if mode == "" { // determine resolution for check address
if qc.CustomAddress != "" {
// if the service is using a custom address, enable the check to
// inherit that custom address
mode = structs.AddressModeAuto
} else {
// otherwise a check defaults to the host address
mode = structs.AddressModeHost
}
}
label := q.PortLabel
if label == "" {
label = qc.ServicePortLabel
}
status := qc.NetworkStatus.NetworkStatus()
addr, port, err := serviceregistration.GetAddress(
qc.CustomAddress, // custom address
mode, // check address mode
label, // port label
qc.Networks, // allocation networks
nil, // driver network (not supported)
qc.Ports, // ports
status, // allocation network status
)
if err != nil {
return "", err
}
if port > 0 {
addr = net.JoinHostPort(addr, strconv.Itoa(port))
}
return addr, nil
}
func (c *checker) checkTCP(ctx context.Context, qc *QueryContext, q *Query) *structs.CheckQueryResult {
qr := &structs.CheckQueryResult{
Mode: q.Mode,
Timestamp: c.now(),
Status: structs.CheckPending,
}
addr, err := address(qc, q)
if err != nil {
qr.Output = err.Error()
qr.Status = structs.CheckFailure
return qr
}
if _, err = new(net.Dialer).DialContext(ctx, "tcp", addr); err != nil {
qr.Output = err.Error()
qr.Status = structs.CheckFailure
return qr
}
qr.Output = "nomad: tcp ok"
qr.Status = structs.CheckSuccess
return qr
}
func (c *checker) checkHTTP(ctx context.Context, qc *QueryContext, q *Query) *structs.CheckQueryResult {
qr := &structs.CheckQueryResult{
Mode: q.Mode,
Timestamp: c.now(),
Status: structs.CheckPending,
}
addr, err := address(qc, q)
if err != nil {
qr.Output = err.Error()
qr.Status = structs.CheckFailure
return qr
}
u := (&url.URL{
Scheme: q.Protocol,
Host: addr,
Path: q.Path,
}).String()
request, err := http.NewRequest(q.Method, u, nil)
if err != nil {
qr.Output = fmt.Sprintf("nomad: %s", err.Error())
qr.Status = structs.CheckFailure
return qr
}
request = request.WithContext(ctx)
result, err := c.httpClient.Do(request)
if err != nil {
qr.Output = fmt.Sprintf("nomad: %s", err.Error())
qr.Status = structs.CheckFailure
return qr
}
// match the result status code to the http status code
qr.StatusCode = result.StatusCode
switch {
case result.StatusCode == 200:
qr.Status = structs.CheckSuccess
qr.Output = "nomad: http ok"
return qr
case result.StatusCode < 400:
qr.Status = structs.CheckSuccess
default:
qr.Status = structs.CheckFailure
}
// status code was not 200; read the response body and set that as the
// check result output content
qr.Output = limitRead(result.Body)
return qr
}
const (
// outputSizeLimit is the maximum number of bytes to read and store of an http
// check output. Set to 3kb which fits in 1 page with room for other fields.
outputSizeLimit = 3 * 1024
)
func limitRead(r io.Reader) string {
b := make([]byte, 0, outputSizeLimit)
output := bytes.NewBuffer(b)
limited := io.LimitReader(r, outputSizeLimit)
if _, err := io.Copy(output, limited); err != nil {
return fmt.Sprintf("nomad: %s", err.Error())
}
return output.String()
}

View File

@ -0,0 +1,342 @@
package checks
import (
"context"
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/freeport"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
"oss.indeed.com/go/libtime/libtimetest"
)
func TestChecker_Do_HTTP(t *testing.T) {
ci.Parallel(t)
// an example response that will be truncated
tooLong, truncate := bigResponse()
// create an http server with various responses
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/fail":
w.WriteHeader(500)
_, _ = io.WriteString(w, "500 problem")
case "/hang":
time.Sleep(1 * time.Second)
_, _ = io.WriteString(w, "too slow")
case "/long-fail":
w.WriteHeader(500)
_, _ = io.WriteString(w, tooLong)
case "/long-not-fail":
w.WriteHeader(201)
_, _ = io.WriteString(w, tooLong)
default:
w.WriteHeader(200)
_, _ = io.WriteString(w, "200 ok")
}
}))
defer ts.Close()
// get the address and port for http server
tokens := strings.Split(ts.URL, ":")
addr, port := strings.TrimPrefix(tokens[1], "//"), tokens[2]
// create a mock clock so we can assert time is set
now := time.Date(2022, 1, 2, 3, 4, 5, 6, time.UTC)
clock := libtimetest.NewClockMock(t).NowMock.Return(now)
makeQueryContext := func() *QueryContext {
return &QueryContext{
ID: "abc123",
CustomAddress: addr,
ServicePortLabel: port,
Networks: nil,
NetworkStatus: mock.NewNetworkStatus(addr),
Ports: nil,
Group: "group",
Task: "task",
Service: "service",
Check: "check",
}
}
makeQuery := func(
kind structs.CheckMode,
path string,
) *Query {
return &Query{
Mode: kind,
Type: "http",
Timeout: 100 * time.Millisecond,
AddressMode: "auto",
PortLabel: port,
Protocol: "http",
Path: path,
Method: "GET",
}
}
makeExpResult := func(
kind structs.CheckMode,
status structs.CheckStatus,
code int,
output string,
) *structs.CheckQueryResult {
return &structs.CheckQueryResult{
ID: "abc123",
Mode: kind,
Status: status,
StatusCode: code,
Output: output,
Timestamp: now.Unix(),
Group: "group",
Task: "task",
Service: "service",
Check: "check",
}
}
cases := []struct {
name string
qc *QueryContext
q *Query
expResult *structs.CheckQueryResult
}{{
name: "200 healthiness",
qc: makeQueryContext(),
q: makeQuery(structs.Healthiness, "/"),
expResult: makeExpResult(
structs.Healthiness,
structs.CheckSuccess,
http.StatusOK,
"nomad: http ok",
),
}, {
name: "200 readiness",
qc: makeQueryContext(),
q: makeQuery(structs.Readiness, "/"),
expResult: makeExpResult(
structs.Readiness,
structs.CheckSuccess,
http.StatusOK,
"nomad: http ok",
),
}, {
name: "500 healthiness",
qc: makeQueryContext(),
q: makeQuery(structs.Healthiness, "fail"),
expResult: makeExpResult(
structs.Healthiness,
structs.CheckFailure,
http.StatusInternalServerError,
"500 problem",
),
}, {
name: "hang",
qc: makeQueryContext(),
q: makeQuery(structs.Healthiness, "hang"),
expResult: makeExpResult(
structs.Healthiness,
structs.CheckFailure,
0,
fmt.Sprintf(`nomad: Get "%s/hang": context deadline exceeded`, ts.URL),
),
}, {
name: "500 truncate",
qc: makeQueryContext(),
q: makeQuery(structs.Healthiness, "long-fail"),
expResult: makeExpResult(
structs.Healthiness,
structs.CheckFailure,
http.StatusInternalServerError,
truncate,
),
}, {
name: "201 truncate",
qc: makeQueryContext(),
q: makeQuery(structs.Healthiness, "long-not-fail"),
expResult: makeExpResult(
structs.Healthiness,
structs.CheckSuccess,
http.StatusCreated,
truncate,
),
}}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
logger := testlog.HCLogger(t)
c := New(logger)
c.(*checker).clock = clock
ctx := context.Background()
result := c.Do(ctx, tc.qc, tc.q)
must.Eq(t, tc.expResult, result)
})
}
}
// bigResponse creates a response payload larger than the maximum outputSizeLimit
// as well as the same response but truncated to length of outputSizeLimit
func bigResponse() (string, string) {
size := outputSizeLimit + 5
b := make([]byte, size, size)
for i := 0; i < size; i++ {
b[i] = 'a'
}
s := string(b)
return s, s[:outputSizeLimit]
}
func TestChecker_Do_TCP(t *testing.T) {
ci.Parallel(t)
// create a mock clock so we can assert time is set
now := time.Date(2022, 1, 2, 3, 4, 5, 6, time.UTC)
clock := libtimetest.NewClockMock(t).NowMock.Return(now)
makeQueryContext := func(address string, port int) *QueryContext {
return &QueryContext{
ID: "abc123",
CustomAddress: address,
ServicePortLabel: fmt.Sprintf("%d", port),
Networks: nil,
NetworkStatus: mock.NewNetworkStatus(address),
Ports: nil,
Group: "group",
Task: "task",
Service: "service",
Check: "check",
}
}
makeQuery := func(
kind structs.CheckMode,
port int,
) *Query {
return &Query{
Mode: kind,
Type: "tcp",
Timeout: 100 * time.Millisecond,
AddressMode: "auto",
PortLabel: fmt.Sprintf("%d", port),
}
}
makeExpResult := func(
kind structs.CheckMode,
status structs.CheckStatus,
output string,
) *structs.CheckQueryResult {
return &structs.CheckQueryResult{
ID: "abc123",
Mode: kind,
Status: status,
Output: output,
Timestamp: now.Unix(),
Group: "group",
Task: "task",
Service: "service",
Check: "check",
}
}
ports := freeport.MustTake(3)
defer freeport.Return(ports)
cases := []struct {
name string
qc *QueryContext
q *Query
tcpMode string // "ok", "off", "hang"
tcpPort int
expResult *structs.CheckQueryResult
}{{
name: "tcp ok",
qc: makeQueryContext("localhost", ports[0]),
q: makeQuery(structs.Healthiness, ports[0]),
tcpMode: "ok",
tcpPort: ports[0],
expResult: makeExpResult(
structs.Healthiness,
structs.CheckSuccess,
"nomad: tcp ok",
),
}, {
name: "tcp not listening",
qc: makeQueryContext("127.0.0.1", ports[1]),
q: makeQuery(structs.Healthiness, ports[1]),
tcpMode: "off",
tcpPort: ports[1],
expResult: makeExpResult(
structs.Healthiness,
structs.CheckFailure,
fmt.Sprintf("dial tcp 127.0.0.1:%d: connect: connection refused", ports[1]),
),
}, {
name: "tcp slow accept",
qc: makeQueryContext("localhost", ports[2]),
q: makeQuery(structs.Healthiness, ports[2]),
tcpMode: "hang",
tcpPort: ports[2],
expResult: makeExpResult(
structs.Healthiness,
structs.CheckFailure,
"dial tcp: lookup localhost: i/o timeout",
),
}}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
logger := testlog.HCLogger(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
c := New(logger)
c.(*checker).clock = clock
switch tc.tcpMode {
case "ok":
// simulate tcp server by listening
go tcpServer(ctx, tc.tcpPort)
case "hang":
// simulate tcp hang by setting an already expired context
timeout, stop := context.WithDeadline(ctx, now.Add(-1*time.Second))
defer stop()
ctx = timeout
case "off":
// simulate tcp dead connection by not listening
}
result := c.Do(ctx, tc.qc, tc.q)
must.Eq(t, tc.expResult, result)
})
}
}
func tcpServer(ctx context.Context, port int) {
var lc net.ListenConfig
l, _ := lc.Listen(ctx, "tcp", net.JoinHostPort(
"localhost", fmt.Sprintf("%d", port),
))
defer func() {
_ = l.Close()
}()
con, _ := l.Accept()
defer func() {
_ = con.Close()
}()
}

View File

@ -0,0 +1,102 @@
package checks
import (
"time"
"github.com/hashicorp/nomad/nomad/structs"
)
// GetCheckQuery extracts the needed info from c to actually execute the check.
func GetCheckQuery(c *structs.ServiceCheck) *Query {
var protocol = c.Protocol // ensure appropriate default
if c.Type == "http" && protocol == "" {
protocol = "http"
}
return &Query{
Mode: structs.GetCheckMode(c),
Type: c.Type,
Timeout: c.Timeout,
AddressMode: c.AddressMode,
PortLabel: c.PortLabel,
Path: c.Path,
Method: c.Method,
Protocol: protocol,
}
}
// A Query is derived from a ServiceCheck and contains the minimal
// amount of information needed to actually execute that check.
type Query struct {
Mode structs.CheckMode // readiness or healthiness
Type string // tcp or http
Timeout time.Duration // connection / request timeout
AddressMode string // host, driver, or alloc
PortLabel string // label or value
Protocol string // http checks only (http or https)
Path string // http checks only
Method string // http checks only
}
// A QueryContext contains allocation and service parameters necessary for
// address resolution.
type QueryContext struct {
ID structs.CheckID
CustomAddress string
ServicePortLabel string
Networks structs.Networks
NetworkStatus structs.NetworkStatus
Ports structs.AllocatedPorts
Group string
Task string
Service string
Check string
}
// Stub creates a temporary QueryResult for the check of ID in the Pending state
// so we can represent the status of not being checked yet.
func Stub(
id structs.CheckID, kind structs.CheckMode, now int64,
group, task, service, check string,
) *structs.CheckQueryResult {
return &structs.CheckQueryResult{
ID: id,
Mode: kind,
Status: structs.CheckPending,
Output: "nomad: waiting to run",
Timestamp: now,
Group: group,
Task: task,
Service: service,
Check: check,
}
}
// AllocationResults is a view of the check_id -> latest result for group and task
// checks in an allocation.
type AllocationResults map[structs.CheckID]*structs.CheckQueryResult
// diff returns the set of IDs in ids that are not in m.
func (m AllocationResults) diff(ids []structs.CheckID) []structs.CheckID {
var missing []structs.CheckID
for _, id := range ids {
if _, exists := m[id]; !exists {
missing = append(missing, id)
}
}
return missing
}
// ClientResults is a holistic view of alloc_id -> check_id -> latest result
// group and task checks across all allocations on a client.
type ClientResults map[string]AllocationResults
func (cr ClientResults) Insert(allocID string, result *structs.CheckQueryResult) {
if _, exists := cr[allocID]; !exists {
cr[allocID] = make(AllocationResults)
}
cr[allocID][result.ID] = result
}

View File

@ -0,0 +1,171 @@
package checks
import (
"testing"
"time"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
)
func TestChecks_GetCheckQuery(t *testing.T) {
cases := []struct {
name string
cType string
protocol string
onUpdate string
expMode structs.CheckMode
expProtocol string
}{
{
name: "http check and http set",
cType: "http",
protocol: "http",
onUpdate: "checks",
expMode: structs.Healthiness,
expProtocol: "http",
},
{
name: "http check and https set",
cType: "http",
protocol: "https",
onUpdate: "checks",
expMode: structs.Healthiness,
expProtocol: "https",
},
{
name: "http check and protocol unset",
cType: "http",
protocol: "",
onUpdate: "checks",
expMode: structs.Healthiness,
expProtocol: "http", // inherit default
},
{
name: "tcp check and protocol unset",
cType: "tcp",
protocol: "",
onUpdate: "checks",
expMode: structs.Healthiness,
expProtocol: "",
},
{
name: "http check and http set",
cType: "http",
protocol: "http",
onUpdate: "checks",
expMode: structs.Healthiness,
expProtocol: "http",
},
{
name: "on-update ignore",
cType: "http",
protocol: "http",
onUpdate: "ignore",
expMode: structs.Readiness,
expProtocol: "http",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
serviceCheck := &structs.ServiceCheck{
Type: tc.cType,
Path: "/",
Protocol: tc.protocol,
PortLabel: "web",
AddressMode: "host",
Interval: 10 * time.Second,
Timeout: 2 * time.Second,
Method: "GET",
OnUpdate: tc.onUpdate,
}
query := GetCheckQuery(serviceCheck)
must.Eq(t, tc.expMode, query.Mode)
must.Eq(t, tc.expProtocol, query.Protocol)
})
}
}
func TestChecks_Stub(t *testing.T) {
now := time.Date(2020, 1, 2, 3, 4, 5, 6, time.UTC).Unix()
result := Stub(
"abc123", // check id
structs.Healthiness, // kind
now, // timestamp
"group", "task", "service", "check",
)
must.Eq(t, &structs.CheckQueryResult{
ID: "abc123",
Mode: structs.Healthiness,
Status: structs.CheckPending,
Output: "nomad: waiting to run",
Timestamp: now,
Group: "group",
Task: "task",
Service: "service",
Check: "check",
}, result)
}
func TestChecks_AllocationResults_diff(t *testing.T) {
cases := []struct {
name string
results []structs.CheckID
ids []structs.CheckID
exp []structs.CheckID
}{{
name: "empty results and empty ids",
results: nil,
ids: nil,
exp: nil,
}, {
name: "empty results and nonempty ids",
results: nil,
ids: []structs.CheckID{"aaa", "bbb"},
exp: []structs.CheckID{"aaa", "bbb"},
}, {
name: "nonempty results and empty ids",
results: []structs.CheckID{"aaa", "bbb"},
ids: nil,
exp: nil,
}, {
name: "mix",
results: []structs.CheckID{"aaa", "ccc", "ddd", "fff"},
ids: []structs.CheckID{"bbb", "ccc", "fff", "eee"},
exp: []structs.CheckID{"bbb", "eee"},
}}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ar := make(AllocationResults)
for _, id := range tc.results {
ar[id] = nil
}
result := ar.diff(tc.ids)
must.Eq(t, tc.exp, result)
})
}
}
func TestChecks_ClientResults_Insert(t *testing.T) {
cr := make(ClientResults)
cr.Insert("alloc1", &structs.CheckQueryResult{ID: "qr1", Check: "c1"})
cr.Insert("alloc2", &structs.CheckQueryResult{ID: "qr2", Check: "c2"})
cr.Insert("alloc3", &structs.CheckQueryResult{ID: "qr3", Check: "c3"})
cr.Insert("alloc2", &structs.CheckQueryResult{ID: "qr2", Check: "c4"}) // overwrite
cr.Insert("alloc3", &structs.CheckQueryResult{ID: "qr4", Check: "c5"})
exp := ClientResults{
"alloc1": {
"qr1": &structs.CheckQueryResult{ID: "qr1", Check: "c1"},
},
"alloc2": {
"qr2": &structs.CheckQueryResult{ID: "qr2", Check: "c4"},
},
"alloc3": {
"qr3": &structs.CheckQueryResult{ID: "qr3", Check: "c3"},
"qr4": &structs.CheckQueryResult{ID: "qr4", Check: "c5"},
},
}
must.Eq(t, exp, cr)
}

View File

@ -108,6 +108,7 @@ func (a *AllocRegistration) NumChecks() int {
// ServiceRegistrations holds the status of services registered for a
// particular task or task group.
type ServiceRegistrations struct {
// Services maps service_id -> service registration
Services map[string]*ServiceRegistration
}

View File

@ -1,6 +1,7 @@
package state
import (
"bytes"
"fmt"
"os"
"path/filepath"
@ -11,6 +12,7 @@ import (
dmstate "github.com/hashicorp/nomad/client/devicemanager/state"
"github.com/hashicorp/nomad/client/dynamicplugins"
driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state"
"github.com/hashicorp/nomad/client/serviceregistration/checks"
"github.com/hashicorp/nomad/helper/boltdd"
"github.com/hashicorp/nomad/nomad/structs"
"go.etcd.io/bbolt"
@ -29,7 +31,9 @@ allocations/
|--> network_status -> networkStatusEntry{*structs.AllocNetworkStatus}
|--> task-<name>/
|--> local_state -> *trstate.LocalState # Local-only state
|--> task_state -> *structs.TaskState # Sync'd to servers
|--> task_state -> *structs.TaskState # Syncs to servers
|--> checks/
|--> check-<id> -> *structs.CheckState # Syncs to servers
devicemanager/
|--> plugin_state -> *dmstate.PluginState
@ -73,6 +77,9 @@ var (
// stored under
allocNetworkStatusKey = []byte("network_status")
// checkResultsBucket is the bucket name in which check query results are stored
checkResultsBucket = []byte("check_results")
// allocations -> $allocid -> task-$taskname -> the keys below
taskLocalStateKey = []byte("local_state")
taskStateKey = []byte("task_state")
@ -176,6 +183,7 @@ func (s *BoltStateDB) Name() string {
func (s *BoltStateDB) GetAllAllocations() ([]*structs.Allocation, map[string]error, error) {
var allocs []*structs.Allocation
var errs map[string]error
err := s.db.View(func(tx *boltdd.Tx) error {
allocs, errs = s.getAllAllocations(tx)
return nil
@ -195,7 +203,7 @@ type allocEntry struct {
}
func (s *BoltStateDB) getAllAllocations(tx *boltdd.Tx) ([]*structs.Allocation, map[string]error) {
allocs := []*structs.Allocation{}
allocs := make([]*structs.Allocation, 0, 10)
errs := map[string]error{}
allocationsBkt := tx.Bucket(allocationsBucketName)
@ -717,6 +725,71 @@ func (s *BoltStateDB) GetDynamicPluginRegistryState() (*dynamicplugins.RegistryS
return ps, nil
}
func keyForCheck(allocID string, checkID structs.CheckID) []byte {
return []byte(fmt.Sprintf("%s_%s", allocID, checkID))
}
// PutCheckResult puts qr into the state store.
func (s *BoltStateDB) PutCheckResult(allocID string, qr *structs.CheckQueryResult) error {
return s.db.Update(func(tx *boltdd.Tx) error {
bkt, err := tx.CreateBucketIfNotExists(checkResultsBucket)
if err != nil {
return err
}
key := keyForCheck(allocID, qr.ID)
return bkt.Put(key, qr)
})
}
// GetCheckResults gets the check results associated with allocID from the state store.
func (s *BoltStateDB) GetCheckResults() (checks.ClientResults, error) {
m := make(checks.ClientResults)
err := s.db.View(func(tx *boltdd.Tx) error {
bkt := tx.Bucket(checkResultsBucket)
if bkt == nil {
return nil // nothing set yet
}
if err := boltdd.Iterate(bkt, nil, func(key []byte, qr structs.CheckQueryResult) {
parts := bytes.SplitN(key, []byte("_"), 2)
allocID, _ := parts[0], parts[1]
m.Insert(string(allocID), &qr)
}); err != nil {
return err
}
return nil
})
return m, err
}
func (s *BoltStateDB) DeleteCheckResults(allocID string, checkIDs []structs.CheckID) error {
return s.db.Update(func(tx *boltdd.Tx) error {
bkt := tx.Bucket(checkResultsBucket)
if bkt == nil {
return nil // nothing set yet
}
for _, id := range checkIDs {
key := keyForCheck(allocID, id)
if err := bkt.Delete(key); err != nil {
return err
}
}
return nil
})
}
func (s *BoltStateDB) PurgeCheckResults(allocID string) error {
return s.db.Update(func(tx *boltdd.Tx) error {
bkt := tx.Bucket(checkResultsBucket)
if bkt == nil {
return nil // nothing set yet
}
return bkt.DeletePrefix([]byte(allocID + "_"))
})
}
// init initializes metadata entries in a newly created state database.
func (s *BoltStateDB) init() error {
return s.db.Update(func(tx *boltdd.Tx) error {

View File

@ -7,6 +7,7 @@ import (
dmstate "github.com/hashicorp/nomad/client/devicemanager/state"
"github.com/hashicorp/nomad/client/dynamicplugins"
driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state"
"github.com/hashicorp/nomad/client/serviceregistration/checks"
"github.com/hashicorp/nomad/nomad/structs"
)
@ -80,8 +81,6 @@ func (m *ErrDB) PutDynamicPluginRegistryState(state *dynamicplugins.RegistryStat
return fmt.Errorf("Error!")
}
// GetDevicePluginState stores the device manager's plugin state or returns an
// error.
func (m *ErrDB) GetDevicePluginState() (*dmstate.PluginState, error) {
return nil, fmt.Errorf("Error!")
}
@ -94,9 +93,22 @@ func (m *ErrDB) PutDriverPluginState(ps *driverstate.PluginState) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) Close() error {
func (m *ErrDB) PutCheckResult(allocID string, qr *structs.CheckQueryResult) error {
return fmt.Errorf("Error!")
}
// Ensure *ErrDB implements StateDB
var _ StateDB = (*ErrDB)(nil)
func (m *ErrDB) GetCheckResults() (checks.ClientResults, error) {
return nil, fmt.Errorf("Error!")
}
func (m *ErrDB) DeleteCheckResults(allocID string, checkIDs []structs.CheckID) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) PurgeCheckResults(allocID string) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) Close() error {
return fmt.Errorf("Error!")
}

View File

@ -3,11 +3,13 @@ package state
import (
"sync"
hclog "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
dmstate "github.com/hashicorp/nomad/client/devicemanager/state"
"github.com/hashicorp/nomad/client/dynamicplugins"
driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state"
"github.com/hashicorp/nomad/client/serviceregistration/checks"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)
@ -27,6 +29,9 @@ type MemDB struct {
localTaskState map[string]map[string]*state.LocalState
taskState map[string]map[string]*structs.TaskState
// alloc_id -> check_id -> result
checks checks.ClientResults
// devicemanager -> plugin-state
devManagerPs *dmstate.PluginState
@ -49,6 +54,7 @@ func NewMemDB(logger hclog.Logger) *MemDB {
networkStatus: make(map[string]*structs.AllocNetworkStatus),
localTaskState: make(map[string]map[string]*state.LocalState),
taskState: make(map[string]map[string]*structs.TaskState),
checks: make(checks.ClientResults),
logger: logger,
}
}
@ -73,7 +79,7 @@ func (m *MemDB) GetAllAllocations() ([]*structs.Allocation, map[string]error, er
return allocs, map[string]error{}, nil
}
func (m *MemDB) PutAllocation(alloc *structs.Allocation, opts ...WriteOption) error {
func (m *MemDB) PutAllocation(alloc *structs.Allocation, _ ...WriteOption) error {
m.mu.Lock()
defer m.mu.Unlock()
m.allocs[alloc.ID] = alloc
@ -99,7 +105,7 @@ func (m *MemDB) GetNetworkStatus(allocID string) (*structs.AllocNetworkStatus, e
return m.networkStatus[allocID], nil
}
func (m *MemDB) PutNetworkStatus(allocID string, ns *structs.AllocNetworkStatus, opts ...WriteOption) error {
func (m *MemDB) PutNetworkStatus(allocID string, ns *structs.AllocNetworkStatus, _ ...WriteOption) error {
m.mu.Lock()
m.networkStatus[allocID] = ns
defer m.mu.Unlock()
@ -175,7 +181,7 @@ func (m *MemDB) DeleteTaskBucket(allocID, taskName string) error {
return nil
}
func (m *MemDB) DeleteAllocationBucket(allocID string, opts ...WriteOption) error {
func (m *MemDB) DeleteAllocationBucket(allocID string, _ ...WriteOption) error {
m.mu.Lock()
defer m.mu.Unlock()
@ -227,6 +233,40 @@ func (m *MemDB) PutDynamicPluginRegistryState(ps *dynamicplugins.RegistryState)
return nil
}
func (m *MemDB) PutCheckResult(allocID string, qr *structs.CheckQueryResult) error {
m.mu.Lock()
defer m.mu.Unlock()
if _, exists := m.checks[allocID]; !exists {
m.checks[allocID] = make(checks.AllocationResults)
}
m.checks[allocID][qr.ID] = qr
return nil
}
func (m *MemDB) GetCheckResults() (checks.ClientResults, error) {
m.mu.Lock()
defer m.mu.Unlock()
return helper.CopyMap(m.checks), nil
}
func (m *MemDB) DeleteCheckResults(allocID string, checkIDs []structs.CheckID) error {
m.mu.Lock()
defer m.mu.Unlock()
for _, id := range checkIDs {
delete(m.checks[allocID], id)
}
return nil
}
func (m *MemDB) PurgeCheckResults(allocID string) error {
m.mu.Lock()
defer m.mu.Unlock()
delete(m.checks, allocID)
return nil
}
func (m *MemDB) Close() error {
m.mu.Lock()
defer m.mu.Unlock()

View File

@ -5,6 +5,7 @@ import (
dmstate "github.com/hashicorp/nomad/client/devicemanager/state"
"github.com/hashicorp/nomad/client/dynamicplugins"
driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state"
"github.com/hashicorp/nomad/client/serviceregistration/checks"
"github.com/hashicorp/nomad/nomad/structs"
)
@ -87,6 +88,22 @@ func (n NoopDB) GetDynamicPluginRegistryState() (*dynamicplugins.RegistryState,
return nil, nil
}
func (n NoopDB) PutCheckResult(allocID string, qr *structs.CheckQueryResult) error {
return nil
}
func (n NoopDB) GetCheckResults() (checks.ClientResults, error) {
return nil, nil
}
func (n NoopDB) DeleteCheckResults(allocID string, checkIDs []structs.CheckID) error {
return nil
}
func (n NoopDB) PurgeCheckResults(allocID string) error {
return nil
}
func (n NoopDB) Close() error {
return nil
}

View File

@ -16,23 +16,32 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/kr/pretty"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)
// assert each implementation satisfies StateDB interface
var (
_ StateDB = (*BoltStateDB)(nil)
_ StateDB = (*MemDB)(nil)
_ StateDB = (*NoopDB)(nil)
_ StateDB = (*ErrDB)(nil)
)
func setupBoltStateDB(t *testing.T) *BoltStateDB {
dir := t.TempDir()
db, err := NewBoltStateDB(testlog.HCLogger(t), dir)
if err != nil {
if err := os.RemoveAll(dir); err != nil {
t.Logf("error removing boltdb dir: %v", err)
if rmErr := os.RemoveAll(dir); rmErr != nil {
t.Logf("error removing boltdb dir: %v", rmErr)
}
t.Fatalf("error creating boltdb: %v", err)
}
t.Cleanup(func() {
if err := db.Close(); err != nil {
t.Errorf("error closing boltdb: %v", err)
if closeErr := db.Close(); closeErr != nil {
t.Errorf("error closing boltdb: %v", closeErr)
}
})
@ -40,14 +49,12 @@ func setupBoltStateDB(t *testing.T) *BoltStateDB {
}
func testDB(t *testing.T, f func(*testing.T, StateDB)) {
boltdb := setupBoltStateDB(t)
dbs := []StateDB{
setupBoltStateDB(t),
NewMemDB(testlog.HCLogger(t)),
}
memdb := NewMemDB(testlog.HCLogger(t))
impls := []StateDB{boltdb, memdb}
for _, db := range impls {
db := db
for _, db := range dbs {
t.Run(db.Name(), func(t *testing.T) {
f(t, db)
})
@ -374,6 +381,79 @@ func TestStateDB_DynamicRegistry(t *testing.T) {
})
}
func TestStateDB_CheckResult_keyForCheck(t *testing.T) {
ci.Parallel(t)
allocID := "alloc1"
checkID := structs.CheckID("id1")
result := keyForCheck(allocID, checkID)
exp := allocID + "_" + string(checkID)
must.Eq(t, exp, string(result))
}
func TestStateDB_CheckResult(t *testing.T) {
ci.Parallel(t)
qr := func(id string) *structs.CheckQueryResult {
return &structs.CheckQueryResult{
ID: structs.CheckID(id),
Mode: "healthiness",
Status: "passing",
Output: "nomad: tcp ok",
Timestamp: 1,
Group: "group",
Task: "task",
Service: "service",
Check: "check",
}
}
testDB(t, func(t *testing.T, db StateDB) {
t.Run("put and get", func(t *testing.T) {
err := db.PutCheckResult("alloc1", qr("abc123"))
must.NoError(t, err)
results, err := db.GetCheckResults()
must.NoError(t, err)
must.MapContainsKeys(t, results, []string{"alloc1"})
must.MapContainsKeys(t, results["alloc1"], []structs.CheckID{"abc123"})
})
})
testDB(t, func(t *testing.T, db StateDB) {
t.Run("delete", func(t *testing.T) {
must.NoError(t, db.PutCheckResult("alloc1", qr("id1")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id2")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id3")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id4")))
must.NoError(t, db.PutCheckResult("alloc2", qr("id5")))
err := db.DeleteCheckResults("alloc1", []structs.CheckID{"id2", "id3"})
must.NoError(t, err)
results, err := db.GetCheckResults()
must.NoError(t, err)
must.MapContainsKeys(t, results, []string{"alloc1", "alloc2"})
must.MapContainsKeys(t, results["alloc1"], []structs.CheckID{"id1", "id4"})
must.MapContainsKeys(t, results["alloc2"], []structs.CheckID{"id5"})
})
})
testDB(t, func(t *testing.T, db StateDB) {
t.Run("purge", func(t *testing.T) {
must.NoError(t, db.PutCheckResult("alloc1", qr("id1")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id2")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id3")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id4")))
must.NoError(t, db.PutCheckResult("alloc2", qr("id5")))
err := db.PurgeCheckResults("alloc1")
must.NoError(t, err)
results, err := db.GetCheckResults()
must.NoError(t, err)
must.MapContainsKeys(t, results, []string{"alloc2"})
must.MapContainsKeys(t, results["alloc2"], []structs.CheckID{"id5"})
})
})
}
// TestStateDB_Upgrade asserts calling Upgrade on new databases always
// succeeds.
func TestStateDB_Upgrade(t *testing.T) {

View File

@ -5,6 +5,7 @@ import (
dmstate "github.com/hashicorp/nomad/client/devicemanager/state"
"github.com/hashicorp/nomad/client/dynamicplugins"
driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state"
"github.com/hashicorp/nomad/client/serviceregistration/checks"
"github.com/hashicorp/nomad/nomad/structs"
)
@ -28,14 +29,16 @@ type StateDB interface {
// not be stored.
PutAllocation(*structs.Allocation, ...WriteOption) error
// Get/Put DeploymentStatus get and put the allocation's deployment
// status. It may be nil.
// GetDeploymentStatus gets the allocation's deployment status. It may be nil.
GetDeploymentStatus(allocID string) (*structs.AllocDeploymentStatus, error)
// PutDeploymentStatus sets the allocation's deployment status. It may be nil.
PutDeploymentStatus(allocID string, ds *structs.AllocDeploymentStatus) error
// Get/Put NetworkStatus get and put the allocation's network
// status. It may be nil.
// GetNetworkStatus gets the allocation's network status. It may be nil.
GetNetworkStatus(allocID string) (*structs.AllocNetworkStatus, error)
// PutNetworkStatus puts the allocation's network status. It may be nil.
PutNetworkStatus(allocID string, ns *structs.AllocNetworkStatus, opts ...WriteOption) error
// GetTaskRunnerState returns the LocalState and TaskState for a
@ -43,7 +46,7 @@ type StateDB interface {
// error is encountered only the error will be non-nil.
GetTaskRunnerState(allocID, taskName string) (*state.LocalState, *structs.TaskState, error)
// PutTaskRunnerLocalTask stores the LocalState for a TaskRunner or
// PutTaskRunnerLocalState stores the LocalState for a TaskRunner or
// returns an error.
PutTaskRunnerLocalState(allocID, taskName string, val *state.LocalState) error
@ -81,6 +84,18 @@ type StateDB interface {
// PutDynamicPluginRegistryState is used to store the dynamic plugin manager's state.
PutDynamicPluginRegistryState(state *dynamicplugins.RegistryState) error
// PutCheckResult sets the query result for the check implied in qr.
PutCheckResult(allocID string, qr *structs.CheckQueryResult) error
// DeleteCheckResults removes the given set of check results.
DeleteCheckResults(allocID string, checkIDs []structs.CheckID) error
// PurgeCheckResults removes all check results of the given allocation.
PurgeCheckResults(allocID string) error
// GetCheckResults is used to restore the set of check results on this Client.
GetCheckResults() (checks.ClientResults, error)
// Close the database. Unsafe for further use after calling regardless
// of return value.
Close() error

View File

@ -4,7 +4,7 @@ import (
"errors"
"sync"
hclog "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/nomad/structs"
)
@ -32,7 +32,7 @@ type AllocBroadcaster struct {
// nextId is the next id to assign in listener map
nextId int
// closed is true if broadcsater is closed
// closed is true if broadcaster is closed.
closed bool
// last alloc sent to prime new listeners

View File

@ -182,6 +182,20 @@ type AllocExecRequest struct {
structs.QueryOptions
}
// AllocChecksRequest is used to request the latest nomad service discovery
// check status information of a given allocation.
type AllocChecksRequest struct {
structs.QueryOptions
AllocID string
}
// AllocChecksResponse is used to return the latest nomad service discovery
// check status information of a given allocation.
type AllocChecksResponse struct {
structs.QueryMeta
Results map[structs.CheckID]*structs.CheckQueryResult
}
// AllocStatsRequest is used to request the resource usage of a given
// allocation, potentially filtering by task
type AllocStatsRequest struct {

View File

@ -84,6 +84,8 @@ func (s *HTTPServer) AllocSpecificRequest(resp http.ResponseWriter, req *http.Re
}
switch tokens[1] {
case "checks":
return s.allocChecks(allocID, resp, req)
case "stop":
return s.allocStop(allocID, resp, req)
case "services":
@ -213,6 +215,8 @@ func (s *HTTPServer) ClientAllocRequest(resp http.ResponseWriter, req *http.Requ
}
allocID := tokens[0]
switch tokens[1] {
case "checks":
return s.allocChecks(allocID, resp, req)
case "stats":
return s.allocStats(allocID, resp, req)
case "exec":
@ -436,6 +440,39 @@ func (s *HTTPServer) allocStats(allocID string, resp http.ResponseWriter, req *h
return reply.Stats, rpcErr
}
func (s *HTTPServer) allocChecks(allocID string, resp http.ResponseWriter, req *http.Request) (any, error) {
// Build the request and parse the ACL token
args := cstructs.AllocChecksRequest{
AllocID: allocID,
}
s.parse(resp, req, &args.QueryOptions.Region, &args.QueryOptions)
// Determine the handler to use
useLocalClient, useClientRPC, useServerRPC := s.rpcHandlerForAlloc(allocID)
// Make the RPC
var reply cstructs.AllocChecksResponse
var rpcErr error
switch {
case useLocalClient:
rpcErr = s.agent.Client().ClientRPC("Allocations.Checks", &args, &reply)
case useClientRPC:
rpcErr = s.agent.Client().RPC("Allocations.Checks", &args, &reply)
case useServerRPC:
rpcErr = s.agent.Server().RPC("Allocations.Checks", &args, &reply)
default:
rpcErr = CodedError(400, "No local Node and node_id not provided")
}
if rpcErr != nil {
if structs.IsErrNoNodeConn(rpcErr) || structs.IsErrUnknownAllocation(rpcErr) {
rpcErr = CodedError(404, rpcErr.Error())
}
}
return reply.Results, rpcErr
}
func (s *HTTPServer) allocExec(allocID string, resp http.ResponseWriter, req *http.Request) (interface{}, error) {
// Build the request and parse the ACL token
task := req.URL.Query().Get("task")

View File

@ -87,7 +87,7 @@ func commandAssetsConnectShortNomad() (*asset, error) {
return nil, err
}
info := bindataFileInfo{name: "command/assets/connect-short.nomad", size: 1071, mode: os.FileMode(420), modTime: time.Unix(1654712611, 0)}
info := bindataFileInfo{name: "command/assets/connect-short.nomad", size: 1071, mode: os.FileMode(436), modTime: time.Unix(1656778128, 0)}
a := &asset{bytes: bytes, info: info}
return a, nil
}
@ -107,7 +107,7 @@ func commandAssetsConnectNomad() (*asset, error) {
return nil, err
}
info := bindataFileInfo{name: "command/assets/connect.nomad", size: 18126, mode: os.FileMode(420), modTime: time.Unix(1654712611, 0)}
info := bindataFileInfo{name: "command/assets/connect.nomad", size: 18126, mode: os.FileMode(436), modTime: time.Unix(1656778128, 0)}
a := &asset{bytes: bytes, info: info}
return a, nil
}
@ -127,7 +127,7 @@ func commandAssetsExampleShortNomad() (*asset, error) {
return nil, err
}
info := bindataFileInfo{name: "command/assets/example-short.nomad", size: 369, mode: os.FileMode(420), modTime: time.Unix(1654554902, 0)}
info := bindataFileInfo{name: "command/assets/example-short.nomad", size: 369, mode: os.FileMode(436), modTime: time.Unix(1656778128, 0)}
a := &asset{bytes: bytes, info: info}
return a, nil
}
@ -147,7 +147,7 @@ func commandAssetsExampleNomad() (*asset, error) {
return nil, err
}
info := bindataFileInfo{name: "command/assets/example.nomad", size: 16351, mode: os.FileMode(420), modTime: time.Unix(1654554902, 0)}
info := bindataFileInfo{name: "command/assets/example.nomad", size: 16351, mode: os.FileMode(436), modTime: time.Unix(1656778128, 0)}
a := &asset{bytes: bytes, info: info}
return a, nil
}

View File

@ -356,6 +356,8 @@ func (b *Bucket) Get(key []byte, obj interface{}) error {
// Iterate iterates each key in Bucket b that starts with prefix. fn is called on
// the key and msg-pack decoded value. If prefix is empty or nil, all keys in the
// bucket are iterated.
//
// b must already exist.
func Iterate[T any](b *Bucket, prefix []byte, fn func([]byte, T)) error {
c := b.boltBucket.Cursor()
for k, data := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, data = c.Next() {
@ -371,6 +373,8 @@ func Iterate[T any](b *Bucket, prefix []byte, fn func([]byte, T)) error {
// DeletePrefix removes all keys starting with prefix from the bucket. If no keys
// with prefix exist then nothing is done and a nil error is returned. Returns an
// error if the bucket was created from a read-only transaction.
//
// b must already exist.
func (b *Bucket) DeletePrefix(prefix []byte) error {
c := b.boltBucket.Cursor()
for k, _ := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, _ = c.Next() {

View File

@ -245,6 +245,8 @@ func SliceStringIsSubset(larger, smaller []string) (bool, []string) {
}
// SliceStringContains returns whether item exists at least once in list.
//
// Deprecated; use slices.Contains instead.
func SliceStringContains(list []string, item string) bool {
for _, s := range list {
if s == item {

View File

@ -10,14 +10,13 @@ import (
metrics "github.com/armon/go-metrics"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-msgpack/codec"
"github.com/hashicorp/nomad/acl"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/nomad/structs"
)
// ClientAllocations is used to forward RPC requests to the targed Nomad client's
// ClientAllocations is used to forward RPC requests to the targeted Nomad client's
// Allocation endpoint.
type ClientAllocations struct {
srv *Server

30
nomad/mock/checks.go Normal file
View File

@ -0,0 +1,30 @@
package mock
import (
"github.com/hashicorp/nomad/nomad/structs"
)
// CheckShim is a mock implementation of checkstore.Shim
//
// So far the implementation does nothing.
type CheckShim struct{}
func (s *CheckShim) Set(allocID string, result *structs.CheckQueryResult) error {
return nil
}
func (s *CheckShim) List(allocID string) map[structs.CheckID]*structs.CheckQueryResult {
return nil
}
func (s *CheckShim) Difference(allocID string, ids []structs.CheckID) []structs.CheckID {
return nil
}
func (s *CheckShim) Remove(allocID string, ids []structs.CheckID) error {
return nil
}
func (s *CheckShim) Purge(allocID string) error {
return nil
}

19
nomad/mock/network.go Normal file
View File

@ -0,0 +1,19 @@
package mock
import (
"github.com/hashicorp/nomad/nomad/structs"
)
// NetworkStatus is a mock implementation of structs.NetworkStatus
type NetworkStatus struct {
address string
}
// NewNetworkStatus creates a mock NetworkStatus based on address.
func NewNetworkStatus(address string) structs.NetworkStatus {
return &NetworkStatus{address: address}
}
func (ns *NetworkStatus) NetworkStatus() *structs.AllocNetworkStatus {
return &structs.AllocNetworkStatus{Address: ns.address}
}

104
nomad/structs/check_test.go Normal file
View File

@ -0,0 +1,104 @@
package structs
import (
"testing"
"time"
"github.com/hashicorp/nomad/ci"
"github.com/shoenig/test/must"
)
func TestChecks_NomadCheckID(t *testing.T) {
ci.Parallel(t)
orig := ServiceCheck{
Name: "c1",
Type: "http",
Path: "/health",
Protocol: "https",
PortLabel: "web",
AddressMode: "host",
Interval: 1 * time.Minute,
Timeout: 10 * time.Second,
Method: "GET",
TaskName: "t1",
OnUpdate: "ignore",
}
different := func(a, b ServiceCheck) bool {
idA := NomadCheckID("id", "group", &a)
idB := NomadCheckID("id", "group", &b)
return idA != idB
}
t.Run("same", func(t *testing.T) {
c := orig
must.False(t, different(orig, c))
})
t.Run("different name", func(t *testing.T) {
c := orig
c.Name = "c2"
must.True(t, different(orig, c))
})
t.Run("different type", func(t *testing.T) {
c := orig
c.Type = "tcp"
must.True(t, different(orig, c))
})
t.Run("different path", func(t *testing.T) {
c := orig
c.Path = "/metrics"
must.True(t, different(orig, c))
})
t.Run("different protocol", func(t *testing.T) {
c := orig
c.Protocol = "http"
must.True(t, different(orig, c))
})
t.Run("different port label", func(t *testing.T) {
c := orig
c.PortLabel = "ingress"
must.True(t, different(orig, c))
})
t.Run("different address mode", func(t *testing.T) {
c := orig
c.AddressMode = "bridge"
must.True(t, different(orig, c))
})
t.Run("different interval", func(t *testing.T) {
c := orig
c.Interval = 1 * time.Second
must.True(t, different(orig, c))
})
t.Run("different timeout", func(t *testing.T) {
c := orig
c.Timeout = 5 * time.Second
must.True(t, different(orig, c))
})
t.Run("different method", func(t *testing.T) {
c := orig
c.Method = "POST"
must.True(t, different(orig, c))
})
t.Run("different task", func(t *testing.T) {
c := orig
c.TaskName = "task2"
must.True(t, different(orig, c))
})
t.Run("different on update", func(t *testing.T) {
c := orig
c.OnUpdate = "checks"
must.True(t, different(orig, c))
})
}

92
nomad/structs/checks.go Normal file
View File

@ -0,0 +1,92 @@
package structs
import (
"crypto/md5"
"fmt"
)
// The CheckMode of a Nomad check is either Healthiness or Readiness.
type CheckMode string
const (
// A Healthiness check is useful in the context of ensuring a service
// is capable of performing its duties. This is an indicator that a check's
// on_update configuration is set to "check_result", implying that Deployments
// will not move forward while the check is failing.
Healthiness CheckMode = "healthiness"
// A Readiness check is useful in the context of ensuring a service is
// should be performing its duties (regardless of healthiness). This is an
// indicator that the check's on_update configuration is set to "ignore",
// implying that Deployments will move forward regardless if the check is
// failing.
Readiness CheckMode = "readiness"
)
// GetCheckMode determines whether the check is readiness or healthiness.
func GetCheckMode(c *ServiceCheck) CheckMode {
if c != nil && c.OnUpdate == "ignore" {
return Readiness
}
return Healthiness
}
// An CheckID is unique to a check.
type CheckID string
// A CheckQueryResult represents the outcome of a single execution of a Nomad service
// check. It records the result, the output, and when the execution took place.
// Advanced check math (e.g. success_before_passing) are left to the calling
// context.
type CheckQueryResult struct {
ID CheckID
Mode CheckMode
Status CheckStatus
StatusCode int `json:",omitempty"`
Output string
Timestamp int64
// check coordinates
Group string
Task string `json:",omitempty"`
Service string
Check string
}
func (r *CheckQueryResult) String() string {
return fmt.Sprintf("(%s %s %s %v)", r.ID, r.Mode, r.Status, r.Timestamp)
}
// A CheckStatus is the result of executing a check. The status of a query is
// ternary - success, failure, or pending (not yet executed). Deployments treat
// pending and failure as the same - a deployment does not continue until a check
// is passing (unless on_update=ignore).
type CheckStatus string
const (
CheckSuccess CheckStatus = "success"
CheckFailure CheckStatus = "failure"
CheckPending CheckStatus = "pending"
)
// NomadCheckID returns an ID unique to the nomad service check.
//
// Checks of group-level services have no task.
func NomadCheckID(allocID, group string, c *ServiceCheck) CheckID {
sum := md5.New()
hashString(sum, allocID)
hashString(sum, group)
hashString(sum, c.TaskName)
hashString(sum, c.Name)
hashString(sum, c.Type)
hashString(sum, c.PortLabel)
hashString(sum, c.OnUpdate)
hashString(sum, c.AddressMode)
hashDuration(sum, c.Interval)
hashDuration(sum, c.Timeout)
hashString(sum, c.Protocol)
hashString(sum, c.Path)
hashString(sum, c.Method)
h := sum.Sum(nil)
return CheckID(fmt.Sprintf("%x", h))
}

View File

@ -2,6 +2,7 @@ package structs
import (
"crypto/sha1"
"encoding/binary"
"errors"
"fmt"
"hash"
@ -19,6 +20,7 @@ import (
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/args"
"github.com/mitchellh/copystructure"
"golang.org/x/exp/slices"
)
const (
@ -41,7 +43,7 @@ const (
// ServiceCheck represents the Consul health check.
type ServiceCheck struct {
Name string // Name of the check, defaults to id
Name string // Name of the check, defaults to a generated label
Type string // Type of the check - tcp, http, docker and script
Command string // Command is the command to run for script checks
Args []string // Args is a list of arguments for script checks
@ -66,6 +68,12 @@ type ServiceCheck struct {
OnUpdate string
}
// IsReadiness returns whether the configuration of the ServiceCheck is effectively
// a readiness check - i.e. check failures do not affect a deployment.
func (sc *ServiceCheck) IsReadiness() bool {
return sc != nil && sc.OnUpdate == "ignore"
}
// Copy the stanza recursively. Returns nil if nil.
func (sc *ServiceCheck) Copy() *ServiceCheck {
if sc == nil {
@ -207,48 +215,49 @@ func (sc *ServiceCheck) Canonicalize(serviceName string) {
}
}
// validate a Service's ServiceCheck
func (sc *ServiceCheck) validate() error {
// Validate Type
// validateCommon validates the parts of ServiceCheck shared across providers.
func (sc *ServiceCheck) validateCommon(allowableTypes []string) error {
// validate the type is allowable (different between nomad, consul checks)
checkType := strings.ToLower(sc.Type)
if !slices.Contains(allowableTypes, checkType) {
s := strings.Join(allowableTypes, ", ")
return fmt.Errorf(`invalid check type (%q), must be one of %s`, checkType, s)
}
// validate specific check types
switch checkType {
case ServiceCheckGRPC:
case ServiceCheckTCP:
case ServiceCheckHTTP:
if sc.Path == "" {
return fmt.Errorf("http type must have a valid http path")
return fmt.Errorf("http type must have http path")
}
checkPath, err := url.Parse(sc.Path)
if err != nil {
return fmt.Errorf("http type must have a valid http path")
checkPath, pathErr := url.Parse(sc.Path)
if pathErr != nil {
return fmt.Errorf("http type must have valid http path")
}
if checkPath.IsAbs() {
return fmt.Errorf("http type must have a relative http path")
return fmt.Errorf("http type must have relative http path")
}
case ServiceCheckScript:
if sc.Command == "" {
return fmt.Errorf("script type must have a valid script path")
}
default:
return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type)
}
// Validate interval and timeout
// validate interval
if sc.Interval == 0 {
return fmt.Errorf("missing required value interval. Interval cannot be less than %v", minCheckInterval)
} else if sc.Interval < minCheckInterval {
return fmt.Errorf("interval (%v) cannot be lower than %v", sc.Interval, minCheckInterval)
}
// validate timeout
if sc.Timeout == 0 {
return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval)
} else if sc.Timeout < minCheckTimeout {
return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval)
}
// Validate InitialStatus
// validate the initial status
switch sc.InitialStatus {
case "":
case api.HealthPassing:
@ -256,10 +265,9 @@ func (sc *ServiceCheck) validate() error {
case api.HealthCritical:
default:
return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q or empty`, sc.InitialStatus, api.HealthPassing, api.HealthWarning, api.HealthCritical)
}
// Validate AddressMode
// validate address_mode
switch sc.AddressMode {
case "", AddressModeHost, AddressModeDriver, AddressModeAlloc:
// Ok
@ -269,7 +277,7 @@ func (sc *ServiceCheck) validate() error {
return fmt.Errorf("invalid address_mode %q", sc.AddressMode)
}
// Validate OnUpdate
// validate on_update
switch sc.OnUpdate {
case "", OnUpdateIgnore, OnUpdateRequireHealthy, OnUpdateIgnoreWarn:
// OK
@ -277,9 +285,102 @@ func (sc *ServiceCheck) validate() error {
return fmt.Errorf("on_update must be %q, %q, or %q; got %q", OnUpdateRequireHealthy, OnUpdateIgnoreWarn, OnUpdateIgnore, sc.OnUpdate)
}
// validate check_restart and on_update do not conflict
if sc.CheckRestart != nil {
// CheckRestart and OnUpdate Ignore are incompatible If OnUpdate treats
// an error has healthy, and the deployment succeeds followed by check
// restart restarting failing checks, the deployment is left in an odd
// state
if sc.OnUpdate == OnUpdateIgnore {
return fmt.Errorf("on_update value %q is not compatible with check_restart", sc.OnUpdate)
}
// CheckRestart IgnoreWarnings must be true if a check has defined OnUpdate
// ignore_warnings
if !sc.CheckRestart.IgnoreWarnings && sc.OnUpdate == OnUpdateIgnoreWarn {
return fmt.Errorf("on_update value %q not supported with check_restart ignore_warnings value %q", sc.OnUpdate, strconv.FormatBool(sc.CheckRestart.IgnoreWarnings))
}
}
// validate check_restart
if err := sc.CheckRestart.Validate(); err != nil {
return err
}
return nil
}
// validate a Service's ServiceCheck in the context of the Nomad provider.
func (sc *ServiceCheck) validateNomad() error {
allowable := []string{ServiceCheckTCP, ServiceCheckHTTP}
if err := sc.validateCommon(allowable); err != nil {
return err
}
// expose is connect (consul) specific
if sc.Expose {
return fmt.Errorf("expose may only be set for Consul service checks")
}
// nomad checks do not have warnings
if sc.OnUpdate == "ignore_warnings" {
return fmt.Errorf("on_update may only be set to ignore_warnings for Consul service checks")
}
// below are temporary limitations on checks in nomad
// https://github.com/hashicorp/team-nomad/issues/354
// check_restart not yet supported on nomad
if sc.CheckRestart != nil {
return fmt.Errorf("check_restart may only be set for Consul service checks")
}
// address_mode="driver" not yet supported on nomad
if sc.AddressMode == "driver" {
return fmt.Errorf("address_mode = driver may only be set for Consul service checks")
}
if sc.Type == "http" {
if sc.Method != "" && sc.Method != "GET" {
// unset turns into GET
return fmt.Errorf("http checks may only use GET method in Nomad services")
}
if len(sc.Header) > 0 {
return fmt.Errorf("http checks may not set headers in Nomad services")
}
if len(sc.Body) > 0 {
return fmt.Errorf("http checks may not set Body in Nomad services")
}
}
// success_before_passing is consul only
if sc.SuccessBeforePassing != 0 {
return fmt.Errorf("success_before_passing may only be set for Consul service checks")
}
// failures_before_critical is consul only
if sc.FailuresBeforeCritical != 0 {
return fmt.Errorf("failures_before_critical may only be set for Consul service checks")
}
return nil
}
// validate a Service's ServiceCheck in the context of the Consul provider.
func (sc *ServiceCheck) validateConsul() error {
allowable := []string{ServiceCheckGRPC, ServiceCheckTCP, ServiceCheckHTTP, ServiceCheckScript}
if err := sc.validateCommon(allowable); err != nil {
return err
}
checkType := strings.ToLower(sc.Type)
// Note that we cannot completely validate the Expose field yet - we do not
// know whether this ServiceCheck belongs to a connect-enabled group-service.
// Instead, such validation will happen in a job admission controller.
//
// Consul only.
if sc.Expose {
// We can however immediately ensure expose is configured only for HTTP
// and gRPC checks.
@ -292,6 +393,8 @@ func (sc *ServiceCheck) validate() error {
// passFailCheckTypes are intersection of check types supported by both Consul
// and Nomad when using the pass/fail check threshold features.
//
// Consul only.
passFailCheckTypes := []string{"tcp", "http", "grpc"}
if sc.SuccessBeforePassing < 0 {
@ -306,23 +409,7 @@ func (sc *ServiceCheck) validate() error {
return fmt.Errorf("failures_before_critical not supported for check of type %q", sc.Type)
}
// Check that CheckRestart and OnUpdate do not conflict
if sc.CheckRestart != nil {
// CheckRestart and OnUpdate Ignore are incompatible If OnUpdate treats
// an error has healthy, and the deployment succeeds followed by check
// restart restarting erroring checks, the deployment is left in an odd
// state
if sc.OnUpdate == OnUpdateIgnore {
return fmt.Errorf("on_update value %q is not compatible with check_restart", sc.OnUpdate)
}
// CheckRestart IgnoreWarnings must be true if a check has defined OnUpdate
// ignore_warnings
if !sc.CheckRestart.IgnoreWarnings && sc.OnUpdate == OnUpdateIgnoreWarn {
return fmt.Errorf("on_update value %q not supported with check_restart ignore_warnings value %q", sc.OnUpdate, strconv.FormatBool(sc.CheckRestart.IgnoreWarnings))
}
}
return sc.CheckRestart.Validate()
return nil
}
// RequiresPort returns whether the service check requires the task has a port.
@ -401,6 +488,10 @@ func hashIntIfNonZero(h hash.Hash, name string, i int) {
}
}
func hashDuration(h hash.Hash, dur time.Duration) {
_ = binary.Write(h, binary.LittleEndian, dur)
}
func hashHeader(h hash.Hash, m map[string][]string) {
// maintain backwards compatibility for ID stability
// using the %v formatter on a map with string keys produces consistent
@ -621,14 +712,21 @@ func (s *Service) Validate() error {
return mErr.ErrorOrNil()
}
func (s *Service) validateCheckPort(c *ServiceCheck) error {
if s.PortLabel == "" && c.PortLabel == "" && c.RequiresPort() {
return fmt.Errorf("Check %s invalid: check requires a port but neither check nor service %+q have a port", c.Name, s.Name)
}
return nil
}
// validateConsulService performs validation on a service which is using the
// consul provider.
func (s *Service) validateConsulService(mErr *multierror.Error) {
// check checks
for _, c := range s.Checks {
if s.PortLabel == "" && c.PortLabel == "" && c.RequiresPort() {
mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %s invalid: check requires a port but neither check nor service %+q have a port", c.Name, s.Name))
// validat ethe check port
if err := s.validateCheckPort(c); err != nil {
mErr.Errors = append(mErr.Errors, err)
continue
}
@ -640,7 +738,8 @@ func (s *Service) validateConsulService(mErr *multierror.Error) {
continue
}
if err := c.validate(); err != nil {
// validate the consul check
if err := c.validateConsul(); err != nil {
mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %s invalid: %v", c.Name, err))
}
}
@ -662,12 +761,18 @@ func (s *Service) validateConsulService(mErr *multierror.Error) {
// validateNomadService performs validation on a service which is using the
// nomad provider.
func (s *Service) validateNomadService(mErr *multierror.Error) {
// check checks
for _, c := range s.Checks {
// validate the check port
if err := s.validateCheckPort(c); err != nil {
mErr.Errors = append(mErr.Errors, err)
continue
}
// Service blocks for the Nomad provider do not support checks. We perform
// a nil check, as an empty check list is nilled within the service
// canonicalize function.
if s.Checks != nil {
mErr.Errors = append(mErr.Errors, errors.New("Service with provider nomad cannot include Check blocks"))
// validate the nomad check
if err := c.validateNomad(); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
}
// Services using the Nomad provider do not support Consul connect.

View File

@ -8,6 +8,7 @@ import (
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -68,7 +69,7 @@ func TestServiceCheck_validate_PassingTypes(t *testing.T) {
Interval: 1 * time.Second,
Timeout: 2 * time.Second,
SuccessBeforePassing: 3,
}).validate()
}).validateConsul()
require.NoError(t, err)
}
})
@ -81,7 +82,7 @@ func TestServiceCheck_validate_PassingTypes(t *testing.T) {
Interval: 1 * time.Second,
Timeout: 2 * time.Second,
SuccessBeforePassing: 3,
}).validate()
}).validateConsul()
require.EqualError(t, err, `success_before_passing not supported for check of type "script"`)
})
}
@ -98,7 +99,7 @@ func TestServiceCheck_validate_FailingTypes(t *testing.T) {
Interval: 1 * time.Second,
Timeout: 2 * time.Second,
FailuresBeforeCritical: 3,
}).validate()
}).validateConsul()
require.NoError(t, err)
}
})
@ -112,7 +113,7 @@ func TestServiceCheck_validate_FailingTypes(t *testing.T) {
Timeout: 2 * time.Second,
SuccessBeforePassing: 0,
FailuresBeforeCritical: 3,
}).validate()
}).validateConsul()
require.EqualError(t, err, `failures_before_critical not supported for check of type "script"`)
})
}
@ -129,7 +130,7 @@ func TestServiceCheck_validate_PassFailZero_on_scripts(t *testing.T) {
Timeout: 2 * time.Second,
SuccessBeforePassing: 0, // script checks should still pass validation
FailuresBeforeCritical: 0, // script checks should still pass validation
}).validate()
}).validateConsul()
require.NoError(t, err)
})
}
@ -150,7 +151,7 @@ func TestServiceCheck_validate_OnUpdate_CheckRestart_Conflict(t *testing.T) {
Grace: 5 * time.Second,
},
OnUpdate: "ignore_warnings",
}).validate()
}).validateConsul()
require.EqualError(t, err, `on_update value "ignore_warnings" not supported with check_restart ignore_warnings value "false"`)
})
@ -167,7 +168,7 @@ func TestServiceCheck_validate_OnUpdate_CheckRestart_Conflict(t *testing.T) {
Grace: 5 * time.Second,
},
OnUpdate: "ignore",
}).validate()
}).validateConsul()
require.EqualError(t, err, `on_update value "ignore" is not compatible with check_restart`)
})
@ -184,11 +185,129 @@ func TestServiceCheck_validate_OnUpdate_CheckRestart_Conflict(t *testing.T) {
Grace: 5 * time.Second,
},
OnUpdate: "ignore_warnings",
}).validate()
}).validateConsul()
require.NoError(t, err)
})
}
func TestServiceCheck_validateNomad(t *testing.T) {
ci.Parallel(t)
testCases := []struct {
name string
sc *ServiceCheck
exp string
}{
{name: "grpc", sc: &ServiceCheck{Type: ServiceCheckGRPC}, exp: `invalid check type ("grpc"), must be one of tcp, http`},
{name: "script", sc: &ServiceCheck{Type: ServiceCheckScript}, exp: `invalid check type ("script"), must be one of tcp, http`},
{
name: "expose",
sc: &ServiceCheck{
Type: ServiceCheckTCP,
Expose: true, // consul only
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
},
exp: `expose may only be set for Consul service checks`,
}, {
name: "on_update ignore_warnings",
sc: &ServiceCheck{
Type: ServiceCheckTCP,
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
OnUpdate: "ignore_warnings",
},
exp: `on_update may only be set to ignore_warnings for Consul service checks`,
},
{
name: "success_before_passing",
sc: &ServiceCheck{
Type: ServiceCheckTCP,
SuccessBeforePassing: 3, // consul only
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
},
exp: `success_before_passing may only be set for Consul service checks`,
},
{
name: "failures_before_critical",
sc: &ServiceCheck{
Type: ServiceCheckTCP,
FailuresBeforeCritical: 3, // consul only
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
},
exp: `failures_before_critical may only be set for Consul service checks`,
},
{
name: "check_restart",
sc: &ServiceCheck{
Type: ServiceCheckTCP,
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
CheckRestart: new(CheckRestart),
},
exp: `check_restart may only be set for Consul service checks`,
},
{
name: "address mode driver",
sc: &ServiceCheck{
Type: ServiceCheckTCP,
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
AddressMode: "driver",
},
exp: `address_mode = driver may only be set for Consul service checks`,
},
{
name: "http non GET",
sc: &ServiceCheck{
Type: ServiceCheckHTTP,
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
Path: "/health",
Method: "HEAD",
},
exp: `http checks may only use GET method in Nomad services`,
},
{
name: "http with headers",
sc: &ServiceCheck{
Type: ServiceCheckHTTP,
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
Path: "/health",
Method: "GET",
Header: map[string][]string{"foo": {"bar"}},
},
exp: `http checks may not set headers in Nomad services`,
},
{
name: "http with body",
sc: &ServiceCheck{
Type: ServiceCheckHTTP,
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
Path: "/health",
Method: "GET",
Body: "blah",
},
exp: `http checks may not set Body in Nomad services`,
},
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
err := testCase.sc.validateNomad()
if testCase.exp == "" {
must.NoError(t, err)
} else {
must.EqError(t, err, testCase.exp)
}
})
}
}
func TestService_Hash(t *testing.T) {
ci.Parallel(t)
@ -1551,6 +1670,30 @@ func TestService_Validate(t *testing.T) {
expErrStr: "Consul Connect must be exclusively native",
name: "Native Connect with Sidecar",
},
{
input: &Service{
Name: "testservice",
Provider: "nomad",
PortLabel: "port",
Checks: []*ServiceCheck{
{
Name: "servicecheck",
Type: "http",
Path: "/",
Interval: 1 * time.Second,
Timeout: 3 * time.Second,
},
{
Name: "servicecheck",
Type: "tcp",
Interval: 1 * time.Second,
Timeout: 3 * time.Second,
},
},
},
expErr: false,
name: "provider nomad with checks",
},
{
input: &Service{
Name: "testservice",
@ -1558,12 +1701,12 @@ func TestService_Validate(t *testing.T) {
Checks: []*ServiceCheck{
{
Name: "servicecheck",
Type: "script",
},
},
},
expErr: true,
expErrStr: "Service with provider nomad cannot include Check blocks",
name: "provider nomad with checks",
expErr: true,
name: "provider nomad with invalid check type",
},
{
input: &Service{
@ -1602,6 +1745,8 @@ func TestService_Validate(t *testing.T) {
}
func TestService_Validate_Address(t *testing.T) {
ci.Parallel(t)
try := func(mode, advertise string, exp error) {
s := &Service{Name: "s1", Provider: "consul", AddressMode: mode, Address: advertise}
result := s.Validate()
@ -1702,7 +1847,7 @@ func TestService_validateNomadService(t *testing.T) {
Provider: "nomad",
},
inputErr: &multierror.Error{},
expectedOutputErrors: []error{},
expectedOutputErrors: nil,
name: "valid service",
},
{
@ -1711,13 +1856,18 @@ func TestService_validateNomadService(t *testing.T) {
PortLabel: "http",
Namespace: "default",
Provider: "nomad",
Checks: []*ServiceCheck{
{Name: "some-check"},
},
Checks: []*ServiceCheck{{
Name: "webapp",
Type: ServiceCheckHTTP,
Method: "GET",
Path: "/health",
Interval: 3 * time.Second,
Timeout: 1 * time.Second,
}},
},
inputErr: &multierror.Error{},
expectedOutputErrors: []error{errors.New("Service with provider nomad cannot include Check blocks")},
name: "invalid service due to checks",
expectedOutputErrors: nil,
name: "valid service with checks",
},
{
inputService: &Service{
@ -1739,26 +1889,22 @@ func TestService_validateNomadService(t *testing.T) {
PortLabel: "http",
Namespace: "default",
Provider: "nomad",
Connect: &ConsulConnect{
Native: true,
},
Checks: []*ServiceCheck{
{Name: "some-check"},
},
},
inputErr: &multierror.Error{},
expectedOutputErrors: []error{
errors.New("Service with provider nomad cannot include Check blocks"),
errors.New("Service with provider nomad cannot include Connect blocks"),
errors.New(`invalid check type (""), must be one of tcp, http`),
},
name: "invalid service due to checks and connect",
name: "bad nomad check",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tc.inputService.validateNomadService(tc.inputErr)
require.ElementsMatch(t, tc.expectedOutputErrors, tc.inputErr.Errors)
must.Eq(t, tc.expectedOutputErrors, tc.inputErr.Errors)
})
}
}

View File

@ -25,9 +25,6 @@ import (
"time"
jwt "github.com/golang-jwt/jwt/v4"
"github.com/hashicorp/nomad/helper/escapingfs"
"golang.org/x/crypto/blake2b"
"github.com/hashicorp/cronexpr"
"github.com/hashicorp/go-msgpack/codec"
"github.com/hashicorp/go-multierror"
@ -38,12 +35,14 @@ import (
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/args"
"github.com/hashicorp/nomad/helper/constraints/semver"
"github.com/hashicorp/nomad/helper/escapingfs"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/lib/cpuset"
"github.com/hashicorp/nomad/lib/kheap"
psstructs "github.com/hashicorp/nomad/plugins/shared/structs"
"github.com/miekg/dns"
"github.com/mitchellh/copystructure"
"golang.org/x/crypto/blake2b"
)
var (
@ -4818,7 +4817,7 @@ func (jc *JobChildrenSummary) Copy() *JobChildrenSummary {
return njc
}
// TaskGroup summarizes the state of all the allocations of a particular
// TaskGroupSummary summarizes the state of all the allocations of a particular
// TaskGroup
type TaskGroupSummary struct {
Queued int
@ -4909,9 +4908,9 @@ func (u *UpdateStrategy) Copy() *UpdateStrategy {
return nil
}
copy := new(UpdateStrategy)
*copy = *u
return copy
c := new(UpdateStrategy)
*c = *u
return c
}
func (u *UpdateStrategy) Validate() error {
@ -6314,6 +6313,25 @@ func (tg *TaskGroup) Canonicalize(job *Job) {
}
}
// NomadServices returns a list of all group and task - level services in tg that
// are making use of the nomad service provider.
func (tg *TaskGroup) NomadServices() []*Service {
var services []*Service
for _, service := range tg.Services {
if service.Provider == ServiceProviderNomad {
services = append(services, service)
}
}
for _, task := range tg.Tasks {
for _, service := range task.Services {
if service.Provider == ServiceProviderNomad {
services = append(services, service)
}
}
}
return services
}
// Validate is used to check a task group for reasonable configuration
func (tg *TaskGroup) Validate(j *Job) error {
var mErr multierror.Error
@ -7817,7 +7835,7 @@ func (wc *WaitConfig) Validate() error {
return nil
}
// AllocState records a single event that changes the state of the whole allocation
// AllocStateField records a single event that changes the state of the whole allocation
type AllocStateField uint8
const (
@ -10652,6 +10670,12 @@ func (a *AllocNetworkStatus) Copy() *AllocNetworkStatus {
}
}
// NetworkStatus is an interface satisfied by alloc runner, for acquiring the
// network status of an allocation.
type NetworkStatus interface {
NetworkStatus() *AllocNetworkStatus
}
// AllocDeploymentStatus captures the status of the allocation as part of the
// deployment. This can include things like if the allocation has been marked as
// healthy.

View File

@ -1999,7 +1999,7 @@ func TestTask_Validate_Service_Check(t *testing.T) {
Interval: 10 * time.Second,
}
err := invalidCheck.validate()
err := invalidCheck.validateConsul()
if err == nil || !strings.Contains(err.Error(), "Timeout cannot be less") {
t.Fatalf("expected a timeout validation error but received: %q", err)
}
@ -2011,12 +2011,12 @@ func TestTask_Validate_Service_Check(t *testing.T) {
Timeout: 2 * time.Second,
}
if err := check1.validate(); err != nil {
if err := check1.validateConsul(); err != nil {
t.Fatalf("err: %v", err)
}
check1.InitialStatus = "foo"
err = check1.validate()
err = check1.validateConsul()
if err == nil {
t.Fatal("Expected an error")
}
@ -2026,19 +2026,19 @@ func TestTask_Validate_Service_Check(t *testing.T) {
}
check1.InitialStatus = api.HealthCritical
err = check1.validate()
err = check1.validateConsul()
if err != nil {
t.Fatalf("err: %v", err)
}
check1.InitialStatus = api.HealthPassing
err = check1.validate()
err = check1.validateConsul()
if err != nil {
t.Fatalf("err: %v", err)
}
check1.InitialStatus = ""
err = check1.validate()
err = check1.validateConsul()
if err != nil {
t.Fatalf("err: %v", err)
}
@ -2051,22 +2051,22 @@ func TestTask_Validate_Service_Check(t *testing.T) {
Path: "/foo/bar",
}
err = check2.validate()
err = check2.validateConsul()
if err != nil {
t.Fatalf("err: %v", err)
}
check2.Path = ""
err = check2.validate()
err = check2.validateConsul()
if err == nil {
t.Fatal("Expected an error")
}
if !strings.Contains(err.Error(), "valid http path") {
if !strings.Contains(err.Error(), "http type must have http path") {
t.Fatalf("err: %v", err)
}
check2.Path = "http://www.example.com"
err = check2.validate()
err = check2.validateConsul()
if err == nil {
t.Fatal("Expected an error")
}
@ -2082,7 +2082,7 @@ func TestTask_Validate_Service_Check(t *testing.T) {
Timeout: 1 * time.Second,
Path: "/health",
Expose: true,
}).validate())
}).validateConsul())
})
t.Run("type tcp", func(t *testing.T) {
require.EqualError(t, (&ServiceCheck{
@ -2090,7 +2090,7 @@ func TestTask_Validate_Service_Check(t *testing.T) {
Interval: 1 * time.Second,
Timeout: 1 * time.Second,
Expose: true,
}).validate(), "expose may only be set on HTTP or gRPC checks")
}).validateConsul(), "expose may only be set on HTTP or gRPC checks")
})
})
}