open-nomad/client/allocrunner/taskrunner/template/template.go

725 lines
20 KiB
Go
Raw Normal View History

2018-07-13 20:45:57 +00:00
package template
import (
"context"
2018-07-13 20:45:57 +00:00
"fmt"
"math/rand"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"time"
ctconf "github.com/hashicorp/consul-template/config"
"github.com/hashicorp/consul-template/manager"
"github.com/hashicorp/consul-template/signals"
envparse "github.com/hashicorp/go-envparse"
multierror "github.com/hashicorp/go-multierror"
2018-10-04 23:22:01 +00:00
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"
2018-07-13 20:45:57 +00:00
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/taskenv"
2018-07-13 20:45:57 +00:00
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
// consulTemplateSourceName is the source name when using the TaskHooks.
consulTemplateSourceName = "Template"
// missingDepEventLimit is the number of missing dependencies that will be
// logged before we switch to showing just the number of missing
// dependencies.
missingDepEventLimit = 3
// DefaultMaxTemplateEventRate is the default maximum rate at which a
// template event should be fired.
DefaultMaxTemplateEventRate = 3 * time.Second
)
// TaskTemplateManager is used to run a set of templates for a given task
type TaskTemplateManager struct {
// config holds the template managers configuration
config *TaskTemplateManagerConfig
// lookup allows looking up the set of Nomad templates by their consul-template ID
lookup map[string][]*structs.Template
// runner is the consul-template runner
runner *manager.Runner
// signals is a lookup map from the string representation of a signal to its
// actual signal
signals map[string]os.Signal
// shutdownCh is used to signal and started goroutine to shutdown
shutdownCh chan struct{}
// shutdown marks whether the manager has been shutdown
shutdown bool
shutdownLock sync.Mutex
}
// TaskTemplateManagerConfig is used to configure an instance of the
// TaskTemplateManager
type TaskTemplateManagerConfig struct {
// UnblockCh is closed when the template has been rendered
UnblockCh chan struct{}
// Lifecycle is used to interact with the task the template manager is being
// run for
Lifecycle interfaces.TaskLifecycle
// Events is used to emit events for the task
Events interfaces.EventEmitter
// Templates is the set of templates we are managing
Templates []*structs.Template
// ClientConfig is the Nomad Client configuration
ClientConfig *config.Config
// VaultToken is the Vault token for the task.
VaultToken string
// VaultNamespace is the Vault namespace for the task
VaultNamespace string
2018-07-13 20:45:57 +00:00
// TaskDir is the task's directory
TaskDir string
// EnvBuilder is the environment variable builder for the task.
EnvBuilder *taskenv.Builder
2018-07-13 20:45:57 +00:00
// MaxTemplateEventRate is the maximum rate at which we should emit events.
MaxTemplateEventRate time.Duration
// retryRate is only used for testing and is used to increase the retry rate
retryRate time.Duration
}
// Validate validates the configuration.
func (c *TaskTemplateManagerConfig) Validate() error {
if c == nil {
return fmt.Errorf("Nil config passed")
} else if c.UnblockCh == nil {
return fmt.Errorf("Invalid unblock channel given")
} else if c.Lifecycle == nil {
return fmt.Errorf("Invalid lifecycle hooks given")
} else if c.Events == nil {
return fmt.Errorf("Invalid event hook given")
} else if c.ClientConfig == nil {
return fmt.Errorf("Invalid client config given")
} else if c.TaskDir == "" {
return fmt.Errorf("Invalid task directory given: %q", c.TaskDir)
} else if c.EnvBuilder == nil {
return fmt.Errorf("Invalid task environment given")
} else if c.MaxTemplateEventRate == 0 {
return fmt.Errorf("Invalid max template event rate given")
}
return nil
}
func NewTaskTemplateManager(config *TaskTemplateManagerConfig) (*TaskTemplateManager, error) {
// Check pre-conditions
if err := config.Validate(); err != nil {
return nil, err
}
tm := &TaskTemplateManager{
config: config,
shutdownCh: make(chan struct{}),
}
// Parse the signals that we need
for _, tmpl := range config.Templates {
if tmpl.ChangeSignal == "" {
continue
}
sig, err := signals.Parse(tmpl.ChangeSignal)
if err != nil {
return nil, fmt.Errorf("Failed to parse signal %q", tmpl.ChangeSignal)
}
if tm.signals == nil {
tm.signals = make(map[string]os.Signal)
}
tm.signals[tmpl.ChangeSignal] = sig
}
// Build the consul-template runner
runner, lookup, err := templateRunner(config)
if err != nil {
return nil, err
}
tm.runner = runner
tm.lookup = lookup
go tm.run()
return tm, nil
}
// Stop is used to stop the consul-template runner
func (tm *TaskTemplateManager) Stop() {
tm.shutdownLock.Lock()
defer tm.shutdownLock.Unlock()
if tm.shutdown {
return
}
close(tm.shutdownCh)
tm.shutdown = true
// Stop the consul-template runner
if tm.runner != nil {
tm.runner.Stop()
}
}
// run is the long lived loop that handles errors and templates being rendered
func (tm *TaskTemplateManager) run() {
// Runner is nil if there is no templates
if tm.runner == nil {
// Unblock the start if there is nothing to do
close(tm.config.UnblockCh)
return
}
// Start the runner
go tm.runner.Start()
// Block till all the templates have been rendered
tm.handleFirstRender()
// Detect if there was a shutdown.
select {
case <-tm.shutdownCh:
return
default:
}
// Read environment variables from env templates before we unblock
envMap, err := loadTemplateEnv(tm.config.Templates, tm.config.TaskDir, tm.config.EnvBuilder.Build())
2018-07-13 20:45:57 +00:00
if err != nil {
tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed to read environment variables: %v", err)))
2018-07-13 20:45:57 +00:00
return
}
tm.config.EnvBuilder.SetTemplateEnv(envMap)
// Unblock the task
close(tm.config.UnblockCh)
// If all our templates are change mode no-op, then we can exit here
if tm.allTemplatesNoop() {
return
}
// handle all subsequent render events.
tm.handleTemplateRerenders(time.Now())
}
// handleFirstRender blocks till all templates have been rendered
func (tm *TaskTemplateManager) handleFirstRender() {
// missingDependencies is the set of missing dependencies.
var missingDependencies map[string]struct{}
// eventTimer is used to trigger the firing of an event showing the missing
// dependencies.
eventTimer := time.NewTimer(tm.config.MaxTemplateEventRate)
if !eventTimer.Stop() {
<-eventTimer.C
}
// outstandingEvent tracks whether there is an outstanding event that should
// be fired.
outstandingEvent := false
// Wait till all the templates have been rendered
WAIT:
for {
select {
case <-tm.shutdownCh:
return
case err, ok := <-tm.runner.ErrCh:
if !ok {
continue
}
tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed: %v", err)))
2018-07-13 20:45:57 +00:00
case <-tm.runner.TemplateRenderedCh():
// A template has been rendered, figure out what to do
events := tm.runner.RenderEvents()
// Not all templates have been rendered yet
if len(events) < len(tm.lookup) {
continue
}
for _, event := range events {
// This template hasn't been rendered
if event.LastWouldRender.IsZero() {
continue WAIT
}
}
break WAIT
case <-tm.runner.RenderEventCh():
events := tm.runner.RenderEvents()
joinedSet := make(map[string]struct{})
for _, event := range events {
missing := event.MissingDeps
if missing == nil {
continue
}
for _, dep := range missing.List() {
joinedSet[dep.String()] = struct{}{}
}
}
// Check to see if the new joined set is the same as the old
different := len(joinedSet) != len(missingDependencies)
if !different {
for k := range joinedSet {
if _, ok := missingDependencies[k]; !ok {
different = true
break
}
}
}
// Nothing to do
if !different {
continue
}
// Update the missing set
missingDependencies = joinedSet
// Update the event timer channel
if !outstandingEvent {
// We got new data so reset
outstandingEvent = true
eventTimer.Reset(tm.config.MaxTemplateEventRate)
}
case <-eventTimer.C:
if missingDependencies == nil {
continue
}
// Clear the outstanding event
outstandingEvent = false
// Build the missing set
missingSlice := make([]string, 0, len(missingDependencies))
for k := range missingDependencies {
missingSlice = append(missingSlice, k)
}
sort.Strings(missingSlice)
if l := len(missingSlice); l > missingDepEventLimit {
missingSlice[missingDepEventLimit] = fmt.Sprintf("and %d more", l-missingDepEventLimit)
missingSlice = missingSlice[:missingDepEventLimit+1]
}
missingStr := strings.Join(missingSlice, ", ")
tm.config.Events.EmitEvent(structs.NewTaskEvent(consulTemplateSourceName).SetDisplayMessage(fmt.Sprintf("Missing: %s", missingStr)))
2018-07-13 20:45:57 +00:00
}
}
}
// handleTemplateRerenders is used to handle template render events after they
// have all rendered. It takes action based on which set of templates re-render.
// The passed allRenderedTime is the time at which all templates have rendered.
// This is used to avoid signaling the task for any render event before hand.
func (tm *TaskTemplateManager) handleTemplateRerenders(allRenderedTime time.Time) {
// A lookup for the last time the template was handled
handledRenders := make(map[string]time.Time, len(tm.config.Templates))
for {
select {
case <-tm.shutdownCh:
return
case err, ok := <-tm.runner.ErrCh:
if !ok {
continue
}
tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed: %v", err)))
2018-07-13 20:45:57 +00:00
case <-tm.runner.TemplateRenderedCh():
// A template has been rendered, figure out what to do
var handling []string
signals := make(map[string]struct{})
restart := false
var splay time.Duration
events := tm.runner.RenderEvents()
for id, event := range events {
// First time through
if allRenderedTime.After(event.LastDidRender) || allRenderedTime.Equal(event.LastDidRender) {
handledRenders[id] = allRenderedTime
continue
}
// We have already handled this one
if htime := handledRenders[id]; htime.After(event.LastDidRender) || htime.Equal(event.LastDidRender) {
continue
}
// Lookup the template and determine what to do
tmpls, ok := tm.lookup[id]
if !ok {
tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template runner returned unknown template id %q", id)))
2018-07-13 20:45:57 +00:00
return
}
// Read environment variables from templates
envMap, err := loadTemplateEnv(tm.config.Templates, tm.config.TaskDir, tm.config.EnvBuilder.Build())
2018-07-13 20:45:57 +00:00
if err != nil {
tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed to read environment variables: %v", err)))
2018-07-13 20:45:57 +00:00
return
}
tm.config.EnvBuilder.SetTemplateEnv(envMap)
for _, tmpl := range tmpls {
switch tmpl.ChangeMode {
case structs.TemplateChangeModeSignal:
signals[tmpl.ChangeSignal] = struct{}{}
case structs.TemplateChangeModeRestart:
restart = true
case structs.TemplateChangeModeNoop:
continue
}
if tmpl.Splay > splay {
splay = tmpl.Splay
}
}
handling = append(handling, id)
}
if restart || len(signals) != 0 {
if splay != 0 {
ns := splay.Nanoseconds()
offset := rand.Int63n(ns)
t := time.Duration(offset)
select {
case <-time.After(t):
case <-tm.shutdownCh:
return
}
}
// Update handle time
for _, id := range handling {
handledRenders[id] = events[id].LastDidRender
}
if restart {
tm.config.Lifecycle.Restart(context.Background(),
structs.NewTaskEvent(structs.TaskRestartSignal).
SetDisplayMessage("Template with change_mode restart re-rendered"), false)
2018-07-13 20:45:57 +00:00
} else if len(signals) != 0 {
var mErr multierror.Error
for signal := range signals {
s := tm.signals[signal]
event := structs.NewTaskEvent(structs.TaskSignaling).SetTaskSignal(s).SetDisplayMessage("Template re-rendered")
if err := tm.config.Lifecycle.Signal(event, signal); err != nil {
2018-07-13 20:45:57 +00:00
multierror.Append(&mErr, err)
}
}
if err := mErr.ErrorOrNil(); err != nil {
flat := make([]os.Signal, 0, len(signals))
for signal := range signals {
flat = append(flat, tm.signals[signal])
}
tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed to send signals %v: %v", flat, err)))
2018-07-13 20:45:57 +00:00
}
}
}
}
}
}
// allTemplatesNoop returns whether all the managed templates have change mode noop.
func (tm *TaskTemplateManager) allTemplatesNoop() bool {
for _, tmpl := range tm.config.Templates {
if tmpl.ChangeMode != structs.TemplateChangeModeNoop {
return false
}
}
return true
}
// templateRunner returns a consul-template runner for the given templates and a
// lookup by destination to the template. If no templates are in the config, a
// nil template runner and lookup is returned.
func templateRunner(config *TaskTemplateManagerConfig) (
*manager.Runner, map[string][]*structs.Template, error) {
if len(config.Templates) == 0 {
return nil, nil, nil
}
// Parse the templates
ctmplMapping, err := parseTemplateConfigs(config)
if err != nil {
return nil, nil, err
}
// Create the runner configuration.
runnerConfig, err := newRunnerConfig(config, ctmplMapping)
if err != nil {
return nil, nil, err
}
runner, err := manager.NewRunner(runnerConfig, false)
2018-07-13 20:45:57 +00:00
if err != nil {
return nil, nil, err
}
// Set Nomad's environment variables.
// consul-template falls back to the host process environment if a
// variable isn't explicitly set in the configuration, so we need
// to mask the environment out to ensure only the task env vars are
// available.
runner.Env = maskProcessEnv(config.EnvBuilder.Build().All())
2018-07-13 20:45:57 +00:00
// Build the lookup
idMap := runner.TemplateConfigMapping()
lookup := make(map[string][]*structs.Template, len(idMap))
for id, ctmpls := range idMap {
for _, ctmpl := range ctmpls {
templates := lookup[id]
templates = append(templates, ctmplMapping[ctmpl])
lookup[id] = templates
}
}
return runner, lookup, nil
}
// maskProcessEnv masks away any environment variable not found in task env.
// It manipulates the parameter directly and returns it without copying.
func maskProcessEnv(env map[string]string) map[string]string {
procEnvs := os.Environ()
for _, e := range procEnvs {
ekv := strings.SplitN(e, "=", 2)
if _, ok := env[ekv[0]]; !ok {
env[ekv[0]] = ""
}
}
return env
}
2018-07-13 20:45:57 +00:00
// parseTemplateConfigs converts the tasks templates in the config into
// consul-templates
func parseTemplateConfigs(config *TaskTemplateManagerConfig) (map[*ctconf.TemplateConfig]*structs.Template, error) {
sandboxEnabled := !config.ClientConfig.TemplateConfig.DisableSandbox
2018-07-13 20:45:57 +00:00
taskEnv := config.EnvBuilder.Build()
ctmpls := make(map[*ctconf.TemplateConfig]*structs.Template, len(config.Templates))
2018-07-13 20:45:57 +00:00
for _, tmpl := range config.Templates {
var src, dest string
var err error
2018-07-13 20:45:57 +00:00
if tmpl.SourcePath != "" {
src = taskEnv.ReplaceEnv(tmpl.SourcePath)
src, err = helper.GetPathInSandbox(config.TaskDir, src)
if err != nil && sandboxEnabled {
return nil, fmt.Errorf("template source path escapes alloc directory")
2018-07-13 20:45:57 +00:00
}
}
2018-07-13 20:45:57 +00:00
if tmpl.DestPath != "" {
dest = taskEnv.ReplaceEnv(tmpl.DestPath)
dest, err = helper.GetPathInSandbox(config.TaskDir, dest)
if err != nil && sandboxEnabled {
return nil, fmt.Errorf("template destination path escapes alloc directory")
}
2018-07-13 20:45:57 +00:00
}
ct := ctconf.DefaultTemplateConfig()
ct.Source = &src
ct.Destination = &dest
ct.Contents = &tmpl.EmbeddedTmpl
ct.LeftDelim = &tmpl.LeftDelim
ct.RightDelim = &tmpl.RightDelim
ct.FunctionDenylist = config.ClientConfig.TemplateConfig.FunctionDenylist
if sandboxEnabled {
ct.SandboxPath = &config.TaskDir
}
2018-07-13 20:45:57 +00:00
// Set the permissions
if tmpl.Perms != "" {
v, err := strconv.ParseUint(tmpl.Perms, 8, 12)
if err != nil {
return nil, fmt.Errorf("Failed to parse %q as octal: %v", tmpl.Perms, err)
}
m := os.FileMode(v)
ct.Perms = &m
}
ct.Finalize()
ctmpls[ct] = tmpl
2018-07-13 20:45:57 +00:00
}
return ctmpls, nil
}
// newRunnerConfig returns a consul-template runner configuration, setting the
// Vault and Consul configurations based on the clients configs.
func newRunnerConfig(config *TaskTemplateManagerConfig,
templateMapping map[*ctconf.TemplateConfig]*structs.Template) (*ctconf.Config, error) {
2018-07-13 20:45:57 +00:00
cc := config.ClientConfig
conf := ctconf.DefaultConfig()
// Gather the consul-template templates
flat := ctconf.TemplateConfigs(make([]*ctconf.TemplateConfig, 0, len(templateMapping)))
for ctmpl := range templateMapping {
local := ctmpl
flat = append(flat, local)
2018-07-13 20:45:57 +00:00
}
conf.Templates = &flat
// Force faster retries
if config.retryRate != 0 {
rate := config.retryRate
conf.Consul.Retry.Backoff = &rate
}
// Setup the Consul config
if cc.ConsulConfig != nil {
conf.Consul.Address = &cc.ConsulConfig.Addr
conf.Consul.Token = &cc.ConsulConfig.Token
conf.Consul.Namespace = &cc.ConsulConfig.Namespace
2018-07-13 20:45:57 +00:00
if cc.ConsulConfig.EnableSSL != nil && *cc.ConsulConfig.EnableSSL {
verify := cc.ConsulConfig.VerifySSL != nil && *cc.ConsulConfig.VerifySSL
conf.Consul.SSL = &ctconf.SSLConfig{
Enabled: helper.BoolToPtr(true),
Verify: &verify,
Cert: &cc.ConsulConfig.CertFile,
Key: &cc.ConsulConfig.KeyFile,
CaCert: &cc.ConsulConfig.CAFile,
}
}
if cc.ConsulConfig.Auth != "" {
parts := strings.SplitN(cc.ConsulConfig.Auth, ":", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("Failed to parse Consul Auth config")
}
conf.Consul.Auth = &ctconf.AuthConfig{
Enabled: helper.BoolToPtr(true),
Username: &parts[0],
Password: &parts[1],
}
}
}
// Setup the Vault config
// Always set these to ensure nothing is picked up from the environment
emptyStr := ""
conf.Vault.RenewToken = helper.BoolToPtr(false)
conf.Vault.Token = &emptyStr
if cc.VaultConfig != nil && cc.VaultConfig.IsEnabled() {
conf.Vault.Address = &cc.VaultConfig.Addr
conf.Vault.Token = &config.VaultToken
// Set the Vault Namespace. Passed in Task config has
// highest precedence.
if config.ClientConfig.VaultConfig.Namespace != "" {
conf.Vault.Namespace = &config.ClientConfig.VaultConfig.Namespace
}
if config.VaultNamespace != "" {
conf.Vault.Namespace = &config.VaultNamespace
}
2018-07-13 20:45:57 +00:00
if strings.HasPrefix(cc.VaultConfig.Addr, "https") || cc.VaultConfig.TLSCertFile != "" {
skipVerify := cc.VaultConfig.TLSSkipVerify != nil && *cc.VaultConfig.TLSSkipVerify
verify := !skipVerify
conf.Vault.SSL = &ctconf.SSLConfig{
Enabled: helper.BoolToPtr(true),
Verify: &verify,
Cert: &cc.VaultConfig.TLSCertFile,
Key: &cc.VaultConfig.TLSKeyFile,
CaCert: &cc.VaultConfig.TLSCaFile,
CaPath: &cc.VaultConfig.TLSCaPath,
ServerName: &cc.VaultConfig.TLSServerName,
}
} else {
conf.Vault.SSL = &ctconf.SSLConfig{
Enabled: helper.BoolToPtr(false),
Verify: helper.BoolToPtr(false),
Cert: &emptyStr,
Key: &emptyStr,
CaCert: &emptyStr,
CaPath: &emptyStr,
ServerName: &emptyStr,
}
}
}
conf.Finalize()
return conf, nil
}
// loadTemplateEnv loads task environment variables from all templates.
func loadTemplateEnv(tmpls []*structs.Template, taskDir string, taskEnv *taskenv.TaskEnv) (map[string]string, error) {
2018-07-13 20:45:57 +00:00
all := make(map[string]string, 50)
for _, t := range tmpls {
if !t.Envvars {
continue
}
dest := filepath.Join(taskDir, taskEnv.ReplaceEnv(t.DestPath))
f, err := os.Open(dest)
2018-07-13 20:45:57 +00:00
if err != nil {
return nil, fmt.Errorf("error opening env template: %v", err)
}
defer f.Close()
// Parse environment fil
vars, err := envparse.Parse(f)
if err != nil {
return nil, fmt.Errorf("error parsing env template %q: %v", dest, err)
2018-07-13 20:45:57 +00:00
}
for k, v := range vars {
all[k] = v
}
}
return all, nil
}