From a180c00fc3be1a2cbc6082637a5d160731fc1a37 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Sun, 10 Sep 2017 17:00:25 -0700 Subject: [PATCH] on_warning=false -> ignore_warnings=false Treat warnings as unhealthy by default --- api/tasks.go | 6 ++-- command/agent/consul/check_watcher.go | 42 ++++++++++++++------------- command/agent/job_endpoint.go | 12 ++++---- jobspec/parse.go | 2 +- nomad/structs/structs.go | 10 +++---- 5 files changed, 37 insertions(+), 35 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index 01f5b96d4..3e0bc40af 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -82,9 +82,9 @@ func (r *RestartPolicy) Merge(rp *RestartPolicy) { // CheckRestart describes if and when a task should be restarted based on // failing health checks. type CheckRestart struct { - Limit int `mapstructure:"limit"` - Grace time.Duration `mapstructure:"grace_period"` - OnWarning bool `mapstructure:"on_warning"` + Limit int `mapstructure:"limit"` + Grace time.Duration `mapstructure:"grace_period"` + IgnoreWarnings bool `mapstructure:"ignore_warnings"` } // The ServiceCheck data model represents the consul health check that diff --git a/command/agent/consul/check_watcher.go b/command/agent/consul/check_watcher.go index feefbd520..6261c7d80 100644 --- a/command/agent/consul/check_watcher.go +++ b/command/agent/consul/check_watcher.go @@ -34,12 +34,12 @@ type checkRestart struct { // remove this checkID (if true only checkID will be set) remove bool - task TaskRestarter - restartDelay time.Duration - grace time.Duration - interval time.Duration - timeLimit time.Duration - warning bool + task TaskRestarter + restartDelay time.Duration + grace time.Duration + interval time.Duration + timeLimit time.Duration + ignoreWarnings bool // Mutable fields @@ -61,8 +61,8 @@ func (c *checkRestart) update(now time.Time, status string) { switch status { case api.HealthCritical: case api.HealthWarning: - if !c.warning { - // Warnings are ok, reset state and exit + if c.ignoreWarnings { + // Warnings are ignored, reset state and exit c.unhealthyStart = time.Time{} return } @@ -79,6 +79,8 @@ func (c *checkRestart) update(now time.Time, status string) { if c.unhealthyStart.IsZero() { // First failure, set restart deadline + c.logger.Printf("[DEBUG] consul.health: alloc %q task %q check %q became unhealthy. Restarting in %s if not healthy", + c.allocID, c.taskName, c.checkName, c.timeLimit) c.unhealthyStart = now } @@ -224,18 +226,18 @@ func (w *checkWatcher) Watch(allocID, taskName, checkID string, check *structs.S } c := checkRestart{ - allocID: allocID, - taskName: taskName, - checkID: checkID, - checkName: check.Name, - task: restarter, - restartDelay: restarter.RestartDelay(), - interval: check.Interval, - grace: check.CheckRestart.Grace, - graceUntil: time.Now().Add(check.CheckRestart.Grace), - timeLimit: check.Interval * time.Duration(check.CheckRestart.Limit-1), - warning: check.CheckRestart.OnWarning, - logger: w.logger, + allocID: allocID, + taskName: taskName, + checkID: checkID, + checkName: check.Name, + task: restarter, + restartDelay: restarter.RestartDelay(), + interval: check.Interval, + grace: check.CheckRestart.Grace, + graceUntil: time.Now().Add(check.CheckRestart.Grace), + timeLimit: check.Interval * time.Duration(check.CheckRestart.Limit-1), + ignoreWarnings: check.CheckRestart.IgnoreWarnings, + logger: w.logger, } select { diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index a3efb7d18..8bfc8a189 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -687,9 +687,9 @@ func ApiTaskToStructsTask(apiTask *api.Task, structsTask *structs.Task) { } if service.CheckRestart != nil { structsTask.Services[i].CheckRestart = &structs.CheckRestart{ - Limit: service.CheckRestart.Limit, - Grace: service.CheckRestart.Grace, - OnWarning: service.CheckRestart.OnWarning, + Limit: service.CheckRestart.Limit, + Grace: service.CheckRestart.Grace, + IgnoreWarnings: service.CheckRestart.IgnoreWarnings, } } @@ -713,9 +713,9 @@ func ApiTaskToStructsTask(apiTask *api.Task, structsTask *structs.Task) { } if check.CheckRestart != nil { structsTask.Services[i].Checks[j].CheckRestart = &structs.CheckRestart{ - Limit: check.CheckRestart.Limit, - Grace: check.CheckRestart.Grace, - OnWarning: check.CheckRestart.OnWarning, + Limit: check.CheckRestart.Limit, + Grace: check.CheckRestart.Grace, + IgnoreWarnings: check.CheckRestart.IgnoreWarnings, } } } diff --git a/jobspec/parse.go b/jobspec/parse.go index 1c90963ae..61e5b9968 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -1063,7 +1063,7 @@ func parseCheckRestart(cro *ast.ObjectItem) (*api.CheckRestart, error) { valid := []string{ "limit", "grace_period", - "on_warning", + "ignore_warnings", } if err := checkHCLKeys(cro.Val, valid); err != nil { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a598d0b14..91c3a2b76 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2760,9 +2760,9 @@ func (tg *TaskGroup) GoString() string { // CheckRestart describes if and when a task should be restarted based on // failing health checks. type CheckRestart struct { - Limit int // Restart task after this many unhealthy intervals - Grace time.Duration // Grace time to give tasks after starting to get healthy - OnWarning bool // If true treat checks in `warning` as unhealthy + Limit int // Restart task after this many unhealthy intervals + Grace time.Duration // Grace time to give tasks after starting to get healthy + IgnoreWarnings bool // If true treat checks in `warning` as passing } func (c *CheckRestart) Copy() *CheckRestart { @@ -2798,8 +2798,8 @@ func (c *CheckRestart) Merge(o *CheckRestart) *CheckRestart { nc.Grace = o.Grace } - if !nc.OnWarning { - nc.OnWarning = o.OnWarning + if nc.IgnoreWarnings { + nc.IgnoreWarnings = o.IgnoreWarnings } return nc