8497f132ac
Co-authored-by: Anton Averchenkov <84287187+averche@users.noreply.github.com>
333 lines
9.6 KiB
Go
333 lines
9.6 KiB
Go
package exec
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul-template/child"
|
|
ctconfig "github.com/hashicorp/consul-template/config"
|
|
"github.com/hashicorp/consul-template/manager"
|
|
"github.com/hashicorp/go-hclog"
|
|
"golang.org/x/exp/slices"
|
|
|
|
"github.com/hashicorp/vault/command/agent/config"
|
|
"github.com/hashicorp/vault/command/agent/internal/ctmanager"
|
|
"github.com/hashicorp/vault/helper/useragent"
|
|
"github.com/hashicorp/vault/sdk/helper/pointerutil"
|
|
)
|
|
|
|
type childProcessState uint8
|
|
|
|
const (
|
|
childProcessStateNotStarted childProcessState = iota
|
|
childProcessStateRunning
|
|
childProcessStateRestarting
|
|
childProcessStateStopped
|
|
)
|
|
|
|
type ServerConfig struct {
|
|
Logger hclog.Logger
|
|
AgentConfig *config.Config
|
|
|
|
Namespace string
|
|
|
|
// LogLevel is needed to set the internal Consul Template Runner's log level
|
|
// to match the log level of Vault Agent. The internal Runner creates it's own
|
|
// logger and can't be set externally or copied from the Template Server.
|
|
//
|
|
// LogWriter is needed to initialize Consul Template's internal logger to use
|
|
// the same io.Writer that Vault Agent itself is using.
|
|
LogLevel hclog.Level
|
|
LogWriter io.Writer
|
|
}
|
|
|
|
type Server struct {
|
|
// config holds the ServerConfig used to create it. It's passed along in other
|
|
// methods
|
|
config *ServerConfig
|
|
|
|
// runner is the consul-template runner
|
|
runner *manager.Runner
|
|
|
|
// numberOfTemplates is the count of templates determined by consul-template,
|
|
// we keep the value to ensure all templates have been rendered before
|
|
// starting the child process
|
|
// NOTE: each template may have more than one TemplateConfig, so the numbers may not match up
|
|
numberOfTemplates int
|
|
|
|
logger hclog.Logger
|
|
|
|
childProcess *child.Child
|
|
childProcessState childProcessState
|
|
childProcessLock sync.Mutex
|
|
|
|
// exit channel of the child process
|
|
childProcessExitCh chan int
|
|
|
|
// lastRenderedEnvVars is the cached value of all environment variables
|
|
// rendered by the templating engine; it is used for detecting changes
|
|
lastRenderedEnvVars []string
|
|
}
|
|
|
|
type ProcessExitError struct {
|
|
ExitCode int
|
|
}
|
|
|
|
func (e *ProcessExitError) Error() string {
|
|
return fmt.Sprintf("process exited with %d", e.ExitCode)
|
|
}
|
|
|
|
func NewServer(cfg *ServerConfig) *Server {
|
|
server := Server{
|
|
logger: cfg.Logger,
|
|
config: cfg,
|
|
childProcessState: childProcessStateNotStarted,
|
|
childProcessExitCh: make(chan int),
|
|
}
|
|
|
|
return &server
|
|
}
|
|
|
|
func (s *Server) Run(ctx context.Context, incomingVaultToken chan string) error {
|
|
latestToken := new(string)
|
|
s.logger.Info("starting exec server")
|
|
defer func() {
|
|
s.logger.Info("exec server stopped")
|
|
}()
|
|
|
|
if len(s.config.AgentConfig.EnvTemplates) == 0 || s.config.AgentConfig.Exec == nil {
|
|
s.logger.Info("no env templates or exec config, exiting")
|
|
<-ctx.Done()
|
|
return nil
|
|
}
|
|
|
|
managerConfig := ctmanager.ManagerConfig{
|
|
AgentConfig: s.config.AgentConfig,
|
|
Namespace: s.config.Namespace,
|
|
LogLevel: s.config.LogLevel,
|
|
LogWriter: s.config.LogWriter,
|
|
}
|
|
|
|
runnerConfig, err := ctmanager.NewConfig(managerConfig, s.config.AgentConfig.EnvTemplates)
|
|
if err != nil {
|
|
return fmt.Errorf("template server failed to generate runner config: %w", err)
|
|
}
|
|
|
|
// We leave this in "dry" mode, as there are no files to render;
|
|
// we will get the environment variables rendered contents from the incoming events
|
|
s.runner, err = manager.NewRunner(runnerConfig, true)
|
|
if err != nil {
|
|
return fmt.Errorf("template server failed to create: %w", err)
|
|
}
|
|
|
|
// prevent the templates from being rendered to stdout in "dry" mode
|
|
s.runner.SetOutStream(io.Discard)
|
|
|
|
s.numberOfTemplates = len(s.runner.TemplateConfigMapping())
|
|
|
|
// We receive multiple events every staticSecretRenderInterval
|
|
// from <-s.runner.TemplateRenderedCh(), one for each secret. Only the last
|
|
// event in a batch will contain the latest set of all secrets and the
|
|
// corresponding environment variables. This timer will fire after 2 seconds
|
|
// unless an event comes in which resets the timer back to 2 seconds.
|
|
var debounceTimer *time.Timer
|
|
|
|
// capture the errors related to restarting the child process
|
|
restartChildProcessErrCh := make(chan error)
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
s.runner.Stop()
|
|
s.childProcessLock.Lock()
|
|
if s.childProcess != nil {
|
|
s.childProcess.Stop()
|
|
}
|
|
s.childProcessState = childProcessStateStopped
|
|
s.childProcessLock.Unlock()
|
|
return nil
|
|
|
|
case token := <-incomingVaultToken:
|
|
if token != *latestToken {
|
|
s.logger.Info("exec server received new token")
|
|
|
|
s.runner.Stop()
|
|
*latestToken = token
|
|
newTokenConfig := ctconfig.Config{
|
|
Vault: &ctconfig.VaultConfig{
|
|
Token: latestToken,
|
|
ClientUserAgent: pointerutil.StringPtr(useragent.AgentTemplatingString()),
|
|
},
|
|
}
|
|
|
|
// got a new auth token, merge it in with the existing config
|
|
runnerConfig = runnerConfig.Merge(&newTokenConfig)
|
|
s.runner, err = manager.NewRunner(runnerConfig, true)
|
|
if err != nil {
|
|
s.logger.Error("template server failed with new Vault token", "error", err)
|
|
continue
|
|
}
|
|
|
|
// prevent the templates from being rendered to stdout in "dry" mode
|
|
s.runner.SetOutStream(io.Discard)
|
|
|
|
go s.runner.Start()
|
|
}
|
|
|
|
case err := <-s.runner.ErrCh:
|
|
s.logger.Error("template server error", "error", err.Error())
|
|
s.runner.StopImmediately()
|
|
|
|
// Return after stopping the runner if exit on retry failure was specified
|
|
if s.config.AgentConfig.TemplateConfig != nil && s.config.AgentConfig.TemplateConfig.ExitOnRetryFailure {
|
|
return fmt.Errorf("template server: %w", err)
|
|
}
|
|
|
|
s.runner, err = manager.NewRunner(runnerConfig, true)
|
|
if err != nil {
|
|
return fmt.Errorf("template server failed to create: %w", err)
|
|
}
|
|
go s.runner.Start()
|
|
|
|
case <-s.runner.TemplateRenderedCh():
|
|
// A template has been rendered, figure out what to do
|
|
s.logger.Trace("template rendered")
|
|
events := s.runner.RenderEvents()
|
|
|
|
// This checks if we've finished rendering the initial set of templates,
|
|
// for every consecutive re-render len(events) should equal s.numberOfTemplates
|
|
if len(events) < s.numberOfTemplates {
|
|
// Not all templates have been rendered yet
|
|
continue
|
|
}
|
|
|
|
// assume the renders are finished, until we find otherwise
|
|
doneRendering := true
|
|
var renderedEnvVars []string
|
|
for _, event := range events {
|
|
// This template hasn't been rendered
|
|
if event.LastWouldRender.IsZero() {
|
|
doneRendering = false
|
|
break
|
|
} else {
|
|
for _, tcfg := range event.TemplateConfigs {
|
|
envVar := fmt.Sprintf("%s=%s", *tcfg.MapToEnvironmentVariable, event.Contents)
|
|
renderedEnvVars = append(renderedEnvVars, envVar)
|
|
}
|
|
}
|
|
}
|
|
if !doneRendering {
|
|
continue
|
|
}
|
|
|
|
// sort the environment variables for a deterministic output and easy comparison
|
|
sort.Strings(renderedEnvVars)
|
|
|
|
s.logger.Trace("done rendering templates")
|
|
|
|
// don't restart the process unless a change is detected
|
|
if slices.Equal(s.lastRenderedEnvVars, renderedEnvVars) {
|
|
continue
|
|
}
|
|
|
|
s.lastRenderedEnvVars = renderedEnvVars
|
|
|
|
s.logger.Debug("detected a change in the environment variables: restarting the child process")
|
|
|
|
// if a timer exists, stop it
|
|
if debounceTimer != nil {
|
|
debounceTimer.Stop()
|
|
}
|
|
debounceTimer = time.AfterFunc(2*time.Second, func() {
|
|
if err := s.restartChildProcess(renderedEnvVars); err != nil {
|
|
restartChildProcessErrCh <- fmt.Errorf("unable to restart the child process: %w", err)
|
|
}
|
|
})
|
|
|
|
case err := <-restartChildProcessErrCh:
|
|
// catch the error from restarting
|
|
return err
|
|
|
|
case exitCode := <-s.childProcessExitCh:
|
|
// process exited on its own
|
|
return &ProcessExitError{ExitCode: exitCode}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Server) restartChildProcess(newEnvVars []string) error {
|
|
s.childProcessLock.Lock()
|
|
defer s.childProcessLock.Unlock()
|
|
|
|
switch s.config.AgentConfig.Exec.RestartOnSecretChanges {
|
|
case "always":
|
|
if s.childProcessState == childProcessStateRunning {
|
|
// process is running, need to kill it first
|
|
s.logger.Info("stopping process", "process_id", s.childProcess.Pid())
|
|
s.childProcessState = childProcessStateRestarting
|
|
s.childProcess.Stop()
|
|
}
|
|
case "never":
|
|
if s.childProcessState == childProcessStateRunning {
|
|
s.logger.Info("detected update, but not restarting process", "process_id", s.childProcess.Pid())
|
|
return nil
|
|
}
|
|
default:
|
|
return fmt.Errorf("invalid value for restart-on-secret-changes: %q", s.config.AgentConfig.Exec.RestartOnSecretChanges)
|
|
}
|
|
|
|
args, subshell, err := child.CommandPrep(s.config.AgentConfig.Exec.Command)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to parse command: %w", err)
|
|
}
|
|
|
|
childInput := &child.NewInput{
|
|
Stdin: os.Stdin,
|
|
Stdout: os.Stdout,
|
|
Stderr: os.Stderr,
|
|
Command: args[0],
|
|
Args: args[1:],
|
|
Timeout: 0, // let it run forever
|
|
Env: append(os.Environ(), newEnvVars...),
|
|
ReloadSignal: nil, // can't reload w/ new env vars
|
|
KillSignal: s.config.AgentConfig.Exec.RestartStopSignal,
|
|
KillTimeout: 30 * time.Second,
|
|
Splay: 0,
|
|
Setpgid: subshell,
|
|
Logger: s.logger.StandardLogger(nil),
|
|
}
|
|
|
|
proc, err := child.New(childInput)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
s.childProcess = proc
|
|
|
|
if err := s.childProcess.Start(); err != nil {
|
|
return fmt.Errorf("error starting the child process: %w", err)
|
|
}
|
|
|
|
s.childProcessState = childProcessStateRunning
|
|
|
|
// Listen if the child process exits and bubble it up to the main loop.
|
|
//
|
|
// NOTE: this must be invoked after child.Start() to avoid a potential
|
|
// race condition with ExitCh not being initialized.
|
|
go func() {
|
|
select {
|
|
case exitCode, ok := <-proc.ExitCh():
|
|
// ignore ExitCh channel closures caused by our restarts
|
|
if ok {
|
|
s.childProcessExitCh <- exitCode
|
|
}
|
|
}
|
|
}()
|
|
|
|
return nil
|
|
}
|