open-nomad/client/allocrunner/taskrunner/tasklet.go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package taskrunner

import (
	"context"
	"time"

	metrics "github.com/armon/go-metrics"
	log "github.com/hashicorp/go-hclog"

	"github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"
)

// contextExec allows canceling a interfaces.ScriptExecutor with a context.
type contextExec struct {
	// pctx is the parent context. A subcontext will be created with Exec's
	// timeout.
	pctx context.Context

	// exec to be wrapped in a context
	exec interfaces.ScriptExecutor
}

func newContextExec(ctx context.Context, exec interfaces.ScriptExecutor) *contextExec {
	return &contextExec{
		pctx: ctx,
		exec: exec,
	}
}

// execResult are the outputs of an Exec
type execResult struct {
	output []byte
	code   int
	err    error
}

// Exec a command until the timeout expires, the context is canceled, or the
// underlying Exec returns.
func (c *contextExec) Exec(timeout time.Duration, cmd string, args []string) ([]byte, int, error) {
	resCh := make(chan execResult, 1)

	// Don't trust the underlying implementation to obey timeout
	ctx, cancel := context.WithTimeout(c.pctx, timeout)
	defer cancel()

	go func() {
		output, code, err := c.exec.Exec(timeout, cmd, args)
		select {
		case resCh <- execResult{output, code, err}:
		case <-ctx.Done():
		}
	}()

	select {
	case res := <-resCh:
		return res.output, res.code, res.err
	case <-ctx.Done():
		return nil, 0, ctx.Err()
	}
}

// tasklet is an abstraction around periodically running a script within
// the context of a Task. The interfaces.ScriptExecutor is fired at least
// once and on each interval, and fires a callback whenever the script
// is complete.
type tasklet struct {
	Command    string        // Command is the command to run for tasklet
	Args       []string      // Args is a list of arguments for tasklet
	Interval   time.Duration // Interval of the tasklet
	Timeout    time.Duration // Timeout of the tasklet
	exec       interfaces.ScriptExecutor
	callback   taskletCallback
	logger     log.Logger
	shutdownCh <-chan struct{}
}

// taskletHandle is returned by tasklet.run by cancelling a tasklet and
// waiting for it to shutdown.
type taskletHandle struct {
	// cancel the script
	cancel func()
	exitCh chan struct{}
}

// wait returns a chan that's closed when the tasklet exits
func (t taskletHandle) wait() <-chan struct{} {
	return t.exitCh
}

// taskletCallback is called with a cancellation context and the output of a
// tasklet's Exec whenever it runs.
type taskletCallback func(context.Context, execResult)

// run this tasklet check and return its cancel func. The tasklet's
// callback will be called each time it completes. If the shutdownCh is
// closed the check will be run once more before exiting.
func (t *tasklet) run() *taskletHandle {
	ctx, cancel := context.WithCancel(context.Background())
	exitCh := make(chan struct{})

	// Wrap the original interfaces.ScriptExecutor in one that obeys context
	// cancelation.
	ctxExec := newContextExec(ctx, t.exec)

	go func() {
		defer close(exitCh)
		timer := time.NewTimer(0)
		defer timer.Stop()
		for {
			// Block until tasklet is removed, Nomad is shutting
			// down, or the tasklet interval is up
			select {
			case <-ctx.Done():
				// tasklet has been removed
				return
			case <-t.shutdownCh:
				// unblock but don't exit until after we run once more
			case <-timer.C:
				timer.Reset(t.Interval)
			}

			metrics.IncrCounter([]string{
				"client", "allocrunner", "taskrunner", "tasklet_runs"}, 1)

			// Execute check script with timeout
			t.logger.Trace("tasklet executing")
			output, code, err := ctxExec.Exec(t.Timeout, t.Command, t.Args)
			switch err {
			case context.Canceled:
				// check removed during execution; exit
				return
			case context.DeadlineExceeded:
				metrics.IncrCounter([]string{
					"client", "allocrunner", "taskrunner",
					"tasklet_timeouts"}, 1)
				// If no error was returned, set one to make sure the tasklet
				// is marked as failed
				if err == nil {
					err = context.DeadlineExceeded
				}

				// Log deadline exceeded every time as it's a
				// distinct issue from the tasklet returning failure
				t.logger.Warn("tasklet timed out", "timeout", t.Timeout)
			}

			t.callback(ctx, execResult{output, code, err})

			select {
			case <-t.shutdownCh:
				// We've been told to exit and just ran so exit
				return
			default:
			}
		}
	}()
	return &taskletHandle{cancel: cancel, exitCh: exitCh}
}
[COMPLIANCE] Add Copyright and License Headers 2023-04-10 15:36:59 +00:00			`// Copyright (c) HashiCorp, Inc.`
			`// SPDX-License-Identifier: MPL-2.0`

support script checks for task group services (#6197) In Nomad prior to Consul Connect, all Consul checks work the same except for Script checks. Because the Task being checked is running in its own container namespaces, the check is executed by Nomad in the Task's context. If the Script check passes, Nomad uses the TTL check feature of Consul to update the check status. This means in order to run a Script check, we need to know what Task to execute it in. To support Consul Connect, we need Group Services, and these need to be registered in Consul along with their checks. We could push the Service down into the Task, but this doesn't work if someone wants to associate a service with a task's ports, but do script checks in another task in the allocation. Because Nomad is handling the Script check and not Consul anyways, this moves the script check handling into the task runner so that the task runner can own the script check's configuration and lifecycle. This will allow us to pass the group service check configuration down into a task without associating the service itself with the task. When tasks are checked for script checks, we walk back through their task group to see if there are script checks associated with the task. If so, we'll spin off script check tasklets for them. The group-level service and any restart behaviors it needs are entirely encapsulated within the group service hook. 2019-09-03 19:09:04 +00:00			`package taskrunner`

			`import (`
			`"context"`
			`"time"`

			`metrics "github.com/armon/go-metrics"`
			`log "github.com/hashicorp/go-hclog"`

			`"github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"`
			`)`

			`// contextExec allows canceling a interfaces.ScriptExecutor with a context.`
			`type contextExec struct {`
			`// pctx is the parent context. A subcontext will be created with Exec's`
			`// timeout.`
			`pctx context.Context`

			`// exec to be wrapped in a context`
			`exec interfaces.ScriptExecutor`
			`}`

			`func newContextExec(ctx context.Context, exec interfaces.ScriptExecutor) *contextExec {`
			`return &contextExec{`
			`pctx: ctx,`
			`exec: exec,`
			`}`
			`}`

			`// execResult are the outputs of an Exec`
			`type execResult struct {`
			`output []byte`
			`code int`
			`err error`
			`}`

			`// Exec a command until the timeout expires, the context is canceled, or the`
			`// underlying Exec returns.`
			`func (c *contextExec) Exec(timeout time.Duration, cmd string, args []string) ([]byte, int, error) {`
			`resCh := make(chan execResult, 1)`

			`// Don't trust the underlying implementation to obey timeout`
			`ctx, cancel := context.WithTimeout(c.pctx, timeout)`
			`defer cancel()`

			`go func() {`
			`output, code, err := c.exec.Exec(timeout, cmd, args)`
			`select {`
			`case resCh <- execResult{output, code, err}:`
			`case <-ctx.Done():`
			`}`
			`}()`

			`select {`
			`case res := <-resCh:`
			`return res.output, res.code, res.err`
			`case <-ctx.Done():`
			`return nil, 0, ctx.Err()`
			`}`
			`}`

			`// tasklet is an abstraction around periodically running a script within`
			`// the context of a Task. The interfaces.ScriptExecutor is fired at least`
			`// once and on each interval, and fires a callback whenever the script`
			`// is complete.`
			`type tasklet struct {`
			`Command string // Command is the command to run for tasklet`
			`Args []string // Args is a list of arguments for tasklet`
			`Interval time.Duration // Interval of the tasklet`
			`Timeout time.Duration // Timeout of the tasklet`
			`exec interfaces.ScriptExecutor`
			`callback taskletCallback`
			`logger log.Logger`
			`shutdownCh <-chan struct{}`
			`}`

			`// taskletHandle is returned by tasklet.run by cancelling a tasklet and`
			`// waiting for it to shutdown.`
			`type taskletHandle struct {`
			`// cancel the script`
			`cancel func()`
			`exitCh chan struct{}`
			`}`

			`// wait returns a chan that's closed when the tasklet exits`
			`func (t taskletHandle) wait() <-chan struct{} {`
			`return t.exitCh`
			`}`

			`// taskletCallback is called with a cancellation context and the output of a`
			`// tasklet's Exec whenever it runs.`
			`type taskletCallback func(context.Context, execResult)`

			`// run this tasklet check and return its cancel func. The tasklet's`
			`// callback will be called each time it completes. If the shutdownCh is`
			`// closed the check will be run once more before exiting.`
			`func (t tasklet) run() taskletHandle {`
			`ctx, cancel := context.WithCancel(context.Background())`
			`exitCh := make(chan struct{})`

			`// Wrap the original interfaces.ScriptExecutor in one that obeys context`
			`// cancelation.`
			`ctxExec := newContextExec(ctx, t.exec)`

			`go func() {`
			`defer close(exitCh)`
			`timer := time.NewTimer(0)`
			`defer timer.Stop()`
			`for {`
			`// Block until tasklet is removed, Nomad is shutting`
			`// down, or the tasklet interval is up`
			`select {`
			`case <-ctx.Done():`
			`// tasklet has been removed`
			`return`
			`case <-t.shutdownCh:`
			`// unblock but don't exit until after we run once more`
			`case <-timer.C:`
			`timer.Reset(t.Interval)`
			`}`

			`metrics.IncrCounter([]string{`
			`"client", "allocrunner", "taskrunner", "tasklet_runs"}, 1)`

			`// Execute check script with timeout`
			`t.logger.Trace("tasklet executing")`
			`output, code, err := ctxExec.Exec(t.Timeout, t.Command, t.Args)`
			`switch err {`
			`case context.Canceled:`
			`// check removed during execution; exit`
			`return`
			`case context.DeadlineExceeded:`
			`metrics.IncrCounter([]string{`
			`"client", "allocrunner", "taskrunner",`
			`"tasklet_timeouts"}, 1)`
			`// If no error was returned, set one to make sure the tasklet`
			`// is marked as failed`
			`if err == nil {`
			`err = context.DeadlineExceeded`
			`}`

			`// Log deadline exceeded every time as it's a`
			`// distinct issue from the tasklet returning failure`
			`t.logger.Warn("tasklet timed out", "timeout", t.Timeout)`
			`}`

			`t.callback(ctx, execResult{output, code, err})`

			`select {`
			`case <-t.shutdownCh:`
			`// We've been told to exit and just ran so exit`
			`return`
			`default:`
			`}`
			`}`
			`}()`
			`return &taskletHandle{cancel: cancel, exitCh: exitCh}`
			`}`