Merge pull request #8977 from hashicorp/b-script-check-task

consul: fix validation of task in group-level script-checks
This commit is contained in:
Seth Hoenig 2020-09-29 09:29:02 -05:00 committed by GitHub
commit 6dd862b927
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 141 additions and 3 deletions

View file

@ -8,6 +8,7 @@ IMPROVEMENTS:
BUG FIXES:
* core: Fixed a bug where blocking queries would not include the query's maximum wait time when calculating whether it was safe to retry. [[GH-8921](https://github.com/hashicorp/nomad/issues/8921)]
* consul: Fixed a bug to correctly validate task when using script-checks in group-level services [[GH-8952](https://github.com/hashicorp/nomad/issues/8952)]
## 0.12.5 (September 17, 2020)

View file

@ -204,6 +204,10 @@ func (h *scriptCheckHook) newScriptChecks() map[string]*scriptCheck {
// for them. The group-level service and any check restart behaviors it
// needs are entirely encapsulated within the group service hook which
// watches Consul for status changes.
//
// The script check is associated with a group task if the service.task or
// service.check.task matches the task name. The service.check.task takes
// precedence.
tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
interpolatedGroupServices := taskenv.InterpolateServices(h.taskEnv, tg.Services)
for _, service := range interpolatedGroupServices {
@ -211,7 +215,7 @@ func (h *scriptCheckHook) newScriptChecks() map[string]*scriptCheck {
if check.Type != structs.ServiceCheckScript {
continue
}
if check.TaskName != h.task.Name {
if !h.associated(h.task.Name, service.TaskName, check.TaskName) {
continue
}
groupTaskName := "group-" + tg.Name
@ -237,6 +241,20 @@ func (h *scriptCheckHook) newScriptChecks() map[string]*scriptCheck {
return scriptChecks
}
// associated returns true if the script check is associated with the task. This
// would be the case if the check.task is the same as task, or if the service.task
// is the same as the task _and_ check.task is not configured (i.e. the check
// inherits the task of the service).
func (*scriptCheckHook) associated(task, serviceTask, checkTask string) bool {
if checkTask == task {
return true
}
if serviceTask == task && checkTask == "" {
return true
}
return false
}
// heartbeater is the subset of consul agent functionality needed by script
// checks to heartbeat
type heartbeater interface {
@ -371,7 +389,7 @@ const (
updateTTLBackoffLimit = 3 * time.Second
)
// updateTTL updates the state to Consul, performing an expontential backoff
// updateTTL updates the state to Consul, performing an exponential backoff
// in the case where the check isn't registered in Consul to avoid a race between
// service registration and the first check.
func (s *scriptCheck) updateTTL(ctx context.Context, msg, state string) error {

View file

@ -286,3 +286,27 @@ func TestScript_TaskEnvInterpolation(t *testing.T) {
require.True(t, ok)
require.Equal(t, "my-job-backend-check", check.check.Name)
}
func TestScript_associated(t *testing.T) {
t.Run("neither set", func(t *testing.T) {
require.False(t, new(scriptCheckHook).associated("task1", "", ""))
})
t.Run("service set", func(t *testing.T) {
require.True(t, new(scriptCheckHook).associated("task1", "task1", ""))
require.False(t, new(scriptCheckHook).associated("task1", "task2", ""))
})
t.Run("check set", func(t *testing.T) {
require.True(t, new(scriptCheckHook).associated("task1", "", "task1"))
require.False(t, new(scriptCheckHook).associated("task1", "", "task2"))
})
t.Run("both set", func(t *testing.T) {
// ensure check.task takes precedence over service.task
require.True(t, new(scriptCheckHook).associated("task1", "task1", "task1"))
require.False(t, new(scriptCheckHook).associated("task1", "task1", "task2"))
require.True(t, new(scriptCheckHook).associated("task1", "task2", "task1"))
require.False(t, new(scriptCheckHook).associated("task1", "task2", "task2"))
})
}

View file

@ -478,6 +478,7 @@ func TestJobEndpoint_Register_ConnectExposeCheck(t *testing.T) {
Name: "check2",
Type: "script",
Command: "/bin/true",
TaskName: "web",
Interval: 1 * time.Second,
Timeout: 1 * time.Second,
}, {

View file

@ -5801,6 +5801,12 @@ func (tg *TaskGroup) Validate(j *Job) error {
mErr.Errors = append(mErr.Errors, outer)
}
// Validate group service script-checks
if err := tg.validateScriptChecksInGroupServices(); err != nil {
outer := fmt.Errorf("Task group service check validation failed: %v", err)
mErr.Errors = append(mErr.Errors, outer)
}
// Validate the scaling policy
if err := tg.validateScalingPolicy(j); err != nil {
outer := fmt.Errorf("Task group scaling policy validation failed: %v", err)
@ -5950,6 +5956,26 @@ func (tg *TaskGroup) validateServices() error {
return mErr.ErrorOrNil()
}
// validateScriptChecksInGroupServices ensures group-level services with script
// checks know what task driver to use. Either the service.task or service.check.task
// parameter must be configured.
func (tg *TaskGroup) validateScriptChecksInGroupServices() error {
var mErr multierror.Error
for _, service := range tg.Services {
if service.TaskName == "" {
for _, check := range service.Checks {
if check.Type == "script" && check.TaskName == "" {
mErr.Errors = append(mErr.Errors,
fmt.Errorf("Service [%s]->%s or Check %s must specify task parameter",
tg.Name, service.Name, check.Name,
))
}
}
}
}
return mErr.ErrorOrNil()
}
// validateScalingPolicy ensures that the scaling policy has consistent
// min and max, not in conflict with the task group count
func (tg *TaskGroup) validateScalingPolicy(j *Job) error {

View file

@ -5584,3 +5584,69 @@ func TestAllocatedSharedResources_Canonicalize(t *testing.T) {
},
}, a.Ports)
}
func TestTaskGroup_validateScriptChecksInGroupServices(t *testing.T) {
t.Run("service task not set", func(t *testing.T) {
tg := &TaskGroup{
Name: "group1",
Services: []*Service{{
Name: "service1",
TaskName: "", // unset
Checks: []*ServiceCheck{{
Name: "check1",
Type: "script",
TaskName: "", // unset
}, {
Name: "check2",
Type: "ttl", // not script
}, {
Name: "check3",
Type: "script",
TaskName: "", // unset
}},
}, {
Name: "service2",
Checks: []*ServiceCheck{{
Type: "script",
TaskName: "task1", // set
}},
}, {
Name: "service3",
TaskName: "", // unset
Checks: []*ServiceCheck{{
Name: "check1",
Type: "script",
TaskName: "", // unset
}},
}},
}
errStr := tg.validateScriptChecksInGroupServices().Error()
require.Contains(t, errStr, "Service [group1]->service1 or Check check1 must specify task parameter")
require.Contains(t, errStr, "Service [group1]->service1 or Check check3 must specify task parameter")
require.Contains(t, errStr, "Service [group1]->service3 or Check check1 must specify task parameter")
})
t.Run("service task set", func(t *testing.T) {
tgOK := &TaskGroup{
Name: "group1",
Services: []*Service{{
Name: "service1",
TaskName: "task1",
Checks: []*ServiceCheck{{
Name: "check1",
Type: "script",
}, {
Name: "check2",
Type: "ttl",
}, {
Name: "check3",
Type: "script",
}},
}},
}
mErrOK := tgOK.validateScriptChecksInGroupServices()
require.Nil(t, mErrOK)
})
}

View file

@ -246,7 +246,8 @@ scripts.
- `task` `(string: <required>)` - Specifies the task associated with this
check. Scripts are executed within the task's environment, and
`check_restart` stanzas will apply to the specified task. For `checks` on group
level `services` only.
level `services` only. Inherits the [`service.task`][service_task] value if not
set.
- `timeout` `(string: <required>)` - Specifies how long Consul will wait for a
health check query to succeed. This is specified using a label suffix like
@ -714,3 +715,4 @@ advertise and check directly since Nomad isn't managing any port assignments.
[shutdowndelay]: /docs/job-specification/task#shutdown_delay
[killsignal]: /docs/job-specification/task#kill_signal
[killtimeout]: /docs/job-specification/task#kill_timeout
[service_task]: /docs/job-specification/service#task-1