open-vault/command/agent/exec/exec.go

333 lines
9.6 KiB
Go

package exec
import (
"context"
"fmt"
"io"
"os"
"sort"
"sync"
"time"
"github.com/hashicorp/consul-template/child"
ctconfig "github.com/hashicorp/consul-template/config"
"github.com/hashicorp/consul-template/manager"
"github.com/hashicorp/go-hclog"
"golang.org/x/exp/slices"
"github.com/hashicorp/vault/command/agent/config"
"github.com/hashicorp/vault/command/agent/internal/ctmanager"
"github.com/hashicorp/vault/helper/useragent"
"github.com/hashicorp/vault/sdk/helper/pointerutil"
)
type childProcessState uint8
const (
childProcessStateNotStarted childProcessState = iota
childProcessStateRunning
childProcessStateRestarting
childProcessStateStopped
)
type ServerConfig struct {
Logger hclog.Logger
AgentConfig *config.Config
Namespace string
// LogLevel is needed to set the internal Consul Template Runner's log level
// to match the log level of Vault Agent. The internal Runner creates it's own
// logger and can't be set externally or copied from the Template Server.
//
// LogWriter is needed to initialize Consul Template's internal logger to use
// the same io.Writer that Vault Agent itself is using.
LogLevel hclog.Level
LogWriter io.Writer
}
type Server struct {
// config holds the ServerConfig used to create it. It's passed along in other
// methods
config *ServerConfig
// runner is the consul-template runner
runner *manager.Runner
// numberOfTemplates is the count of templates determined by consul-template,
// we keep the value to ensure all templates have been rendered before
// starting the child process
// NOTE: each template may have more than one TemplateConfig, so the numbers may not match up
numberOfTemplates int
logger hclog.Logger
childProcess *child.Child
childProcessState childProcessState
childProcessLock sync.Mutex
// exit channel of the child process
childProcessExitCh chan int
// lastRenderedEnvVars is the cached value of all environment variables
// rendered by the templating engine; it is used for detecting changes
lastRenderedEnvVars []string
}
type ProcessExitError struct {
ExitCode int
}
func (e *ProcessExitError) Error() string {
return fmt.Sprintf("process exited with %d", e.ExitCode)
}
func NewServer(cfg *ServerConfig) *Server {
server := Server{
logger: cfg.Logger,
config: cfg,
childProcessState: childProcessStateNotStarted,
childProcessExitCh: make(chan int),
}
return &server
}
func (s *Server) Run(ctx context.Context, incomingVaultToken chan string) error {
latestToken := new(string)
s.logger.Info("starting exec server")
defer func() {
s.logger.Info("exec server stopped")
}()
if len(s.config.AgentConfig.EnvTemplates) == 0 || s.config.AgentConfig.Exec == nil {
s.logger.Info("no env templates or exec config, exiting")
<-ctx.Done()
return nil
}
managerConfig := ctmanager.ManagerConfig{
AgentConfig: s.config.AgentConfig,
Namespace: s.config.Namespace,
LogLevel: s.config.LogLevel,
LogWriter: s.config.LogWriter,
}
runnerConfig, err := ctmanager.NewConfig(managerConfig, s.config.AgentConfig.EnvTemplates)
if err != nil {
return fmt.Errorf("template server failed to generate runner config: %w", err)
}
// We leave this in "dry" mode, as there are no files to render;
// we will get the environment variables rendered contents from the incoming events
s.runner, err = manager.NewRunner(runnerConfig, true)
if err != nil {
return fmt.Errorf("template server failed to create: %w", err)
}
// prevent the templates from being rendered to stdout in "dry" mode
s.runner.SetOutStream(io.Discard)
s.numberOfTemplates = len(s.runner.TemplateConfigMapping())
// We receive multiple events every staticSecretRenderInterval
// from <-s.runner.TemplateRenderedCh(), one for each secret. Only the last
// event in a batch will contain the latest set of all secrets and the
// corresponding environment variables. This timer will fire after 2 seconds
// unless an event comes in which resets the timer back to 2 seconds.
var debounceTimer *time.Timer
// capture the errors related to restarting the child process
restartChildProcessErrCh := make(chan error)
for {
select {
case <-ctx.Done():
s.runner.Stop()
s.childProcessLock.Lock()
if s.childProcess != nil {
s.childProcess.Stop()
}
s.childProcessState = childProcessStateStopped
s.childProcessLock.Unlock()
return nil
case token := <-incomingVaultToken:
if token != *latestToken {
s.logger.Info("exec server received new token")
s.runner.Stop()
*latestToken = token
newTokenConfig := ctconfig.Config{
Vault: &ctconfig.VaultConfig{
Token: latestToken,
ClientUserAgent: pointerutil.StringPtr(useragent.AgentTemplatingString()),
},
}
// got a new auth token, merge it in with the existing config
runnerConfig = runnerConfig.Merge(&newTokenConfig)
s.runner, err = manager.NewRunner(runnerConfig, true)
if err != nil {
s.logger.Error("template server failed with new Vault token", "error", err)
continue
}
// prevent the templates from being rendered to stdout in "dry" mode
s.runner.SetOutStream(io.Discard)
go s.runner.Start()
}
case err := <-s.runner.ErrCh:
s.logger.Error("template server error", "error", err.Error())
s.runner.StopImmediately()
// Return after stopping the runner if exit on retry failure was specified
if s.config.AgentConfig.TemplateConfig != nil && s.config.AgentConfig.TemplateConfig.ExitOnRetryFailure {
return fmt.Errorf("template server: %w", err)
}
s.runner, err = manager.NewRunner(runnerConfig, true)
if err != nil {
return fmt.Errorf("template server failed to create: %w", err)
}
go s.runner.Start()
case <-s.runner.TemplateRenderedCh():
// A template has been rendered, figure out what to do
s.logger.Trace("template rendered")
events := s.runner.RenderEvents()
// This checks if we've finished rendering the initial set of templates,
// for every consecutive re-render len(events) should equal s.numberOfTemplates
if len(events) < s.numberOfTemplates {
// Not all templates have been rendered yet
continue
}
// assume the renders are finished, until we find otherwise
doneRendering := true
var renderedEnvVars []string
for _, event := range events {
// This template hasn't been rendered
if event.LastWouldRender.IsZero() {
doneRendering = false
break
} else {
for _, tcfg := range event.TemplateConfigs {
envVar := fmt.Sprintf("%s=%s", *tcfg.MapToEnvironmentVariable, event.Contents)
renderedEnvVars = append(renderedEnvVars, envVar)
}
}
}
if !doneRendering {
continue
}
// sort the environment variables for a deterministic output and easy comparison
sort.Strings(renderedEnvVars)
s.logger.Trace("done rendering templates")
// don't restart the process unless a change is detected
if slices.Equal(s.lastRenderedEnvVars, renderedEnvVars) {
continue
}
s.lastRenderedEnvVars = renderedEnvVars
s.logger.Debug("detected a change in the environment variables: restarting the child process")
// if a timer exists, stop it
if debounceTimer != nil {
debounceTimer.Stop()
}
debounceTimer = time.AfterFunc(2*time.Second, func() {
if err := s.restartChildProcess(renderedEnvVars); err != nil {
restartChildProcessErrCh <- fmt.Errorf("unable to restart the child process: %w", err)
}
})
case err := <-restartChildProcessErrCh:
// catch the error from restarting
return err
case exitCode := <-s.childProcessExitCh:
// process exited on its own
return &ProcessExitError{ExitCode: exitCode}
}
}
}
func (s *Server) restartChildProcess(newEnvVars []string) error {
s.childProcessLock.Lock()
defer s.childProcessLock.Unlock()
switch s.config.AgentConfig.Exec.RestartOnSecretChanges {
case "always":
if s.childProcessState == childProcessStateRunning {
// process is running, need to kill it first
s.logger.Info("stopping process", "process_id", s.childProcess.Pid())
s.childProcessState = childProcessStateRestarting
s.childProcess.Stop()
}
case "never":
if s.childProcessState == childProcessStateRunning {
s.logger.Info("detected update, but not restarting process", "process_id", s.childProcess.Pid())
return nil
}
default:
return fmt.Errorf("invalid value for restart-on-secret-changes: %q", s.config.AgentConfig.Exec.RestartOnSecretChanges)
}
args, subshell, err := child.CommandPrep(s.config.AgentConfig.Exec.Command)
if err != nil {
return fmt.Errorf("unable to parse command: %w", err)
}
childInput := &child.NewInput{
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
Command: args[0],
Args: args[1:],
Timeout: 0, // let it run forever
Env: append(os.Environ(), newEnvVars...),
ReloadSignal: nil, // can't reload w/ new env vars
KillSignal: s.config.AgentConfig.Exec.RestartStopSignal,
KillTimeout: 30 * time.Second,
Splay: 0,
Setpgid: subshell,
Logger: s.logger.StandardLogger(nil),
}
proc, err := child.New(childInput)
if err != nil {
return err
}
s.childProcess = proc
if err := s.childProcess.Start(); err != nil {
return fmt.Errorf("error starting the child process: %w", err)
}
s.childProcessState = childProcessStateRunning
// Listen if the child process exits and bubble it up to the main loop.
//
// NOTE: this must be invoked after child.Start() to avoid a potential
// race condition with ExitCh not being initialized.
go func() {
select {
case exitCode, ok := <-proc.ExitCh():
// ignore ExitCh channel closures caused by our restarts
if ok {
s.childProcessExitCh <- exitCode
}
}
}()
return nil
}