From 606e3ebdd435fe7d78f9477db9db9d1f5ee960cf Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Thu, 21 Jul 2022 09:54:27 -0500 Subject: [PATCH] client: updates from pr feedback --- .changelog/13715.txt | 3 +++ client/allochealth/tracker.go | 7 ++----- client/allocrunner/checks_hook.go | 2 +- client/serviceregistration/checks/checkstore/shim.go | 7 ++++++- nomad/structs/checks.go | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) create mode 100644 .changelog/13715.txt diff --git a/.changelog/13715.txt b/.changelog/13715.txt new file mode 100644 index 000000000..af9589e2c --- /dev/null +++ b/.changelog/13715.txt @@ -0,0 +1,3 @@ +```release-note:improvement +client: Add built-in support for checks on nomad services +``` diff --git a/client/allochealth/tracker.go b/client/allochealth/tracker.go index 7790bc8fa..87820d9ed 100644 --- a/client/allochealth/tracker.go +++ b/client/allochealth/tracker.go @@ -366,7 +366,7 @@ func (t *Tracker) watchTaskEvents() { // reset task health t.setTaskHealth(false, false) - // Avoid the timer from firing at the old start time + // Prevent the timer from firing at the old start time waiter.disable() // Set the timer since all tasks are started @@ -492,9 +492,6 @@ OUTER: // Store the task registrations t.lock.Lock() for task, reg := range allocReg.Tasks { - //TODO(schmichael) for now skip unknown tasks as - //they're task group services which don't currently - //support checks anyway if v, ok := t.taskHealth[task]; ok { v.taskRegistrations = reg } @@ -571,7 +568,7 @@ func (t *Tracker) watchNomadEvents() { for { select { - // we are shutting down + // tracker has been canceled, so stop waiting case <-t.ctx.Done(): return diff --git a/client/allocrunner/checks_hook.go b/client/allocrunner/checks_hook.go index 19ea439e7..1705cc4cb 100644 --- a/client/allocrunner/checks_hook.go +++ b/client/allocrunner/checks_hook.go @@ -55,7 +55,7 @@ func (o *observer) start() { query := checks.GetCheckQuery(o.check) result := o.checker.Do(o.ctx, o.qc, query) - // and put the results into the store + // and put the results into the store (already logged) _ = o.checkStore.Set(o.allocID, result) // setup timer for next interval diff --git a/client/serviceregistration/checks/checkstore/shim.go b/client/serviceregistration/checks/checkstore/shim.go index f83ff512e..3a67381a4 100644 --- a/client/serviceregistration/checks/checkstore/shim.go +++ b/client/serviceregistration/checks/checkstore/shim.go @@ -57,6 +57,8 @@ func (s *shim) restore() { results, err := s.db.GetCheckResults() if err != nil { s.log.Error("failed to restore health check results", "error", err) + // may as well continue and let the check observers repopulate - maybe + // the persistent storage error was transitory return } @@ -90,7 +92,10 @@ func (s *shim) Set(allocID string, qr *structs.CheckQueryResult) error { // on Client restart restored check results may be outdated but the status // is the same as the most recent result if !exists || previous.Status != qr.Status { - return s.db.PutCheckResult(allocID, qr) + if err := s.db.PutCheckResult(allocID, qr); err != nil { + s.log.Error("failed to set check status", "alloc_id", allocID, "check_id", qr.ID, "error", err) + return err + } } return nil diff --git a/nomad/structs/checks.go b/nomad/structs/checks.go index 35257d5bf..23ff5a093 100644 --- a/nomad/structs/checks.go +++ b/nomad/structs/checks.go @@ -15,7 +15,7 @@ const ( // will not move forward while the check is failing. Healthiness CheckMode = "healthiness" - // A Readiness check is useful in the context of ensuring a service is + // A Readiness check is useful in the context of ensuring a service // should be performing its duties (regardless of healthiness). This is an // indicator that the check's on_update configuration is set to "ignore", // implying that Deployments will move forward regardless if the check is