connect: write envoy bootstrap debugging info

When Consul Connect just works, it's wonderful. When it doesn't work it
can be exceeding difficult to debug: operators have to check task
events, Nomad logs, Consul logs, Consul APIs, and even then critical
information is missing.

Using Consul to generate a bootstrap config for Envoy is notoriously
difficult. Nomad doesn't even log stderr, so operators are left trying
to piece together what went wrong.

This patch attempts to provide *maximal* context which unfortunately
includes secrets. **Secrets are always restricted to the secrets/
directory.** This makes debugging a little harder, but allows operators
to know exactly what operation Nomad was trying to perform.

What's added:

- stderr is sent to alloc/logs/envoy_bootstrap.stderr.0
- the CLI is written to secrets/.envoy_bootstrap.cmd
- the environment is written to secrets/.envoy_bootstrap.env as JSON

Accessing this information is unfortunately awkward:
```
nomad alloc exec -task connect-proxy-count-countdash b36a cat secrets/.envoy_bootstrap.env
nomad alloc exec -task connect-proxy-count-countdash b36a cat secrets/.envoy_bootstrap.cmd
nomad alloc fs b36a alloc/logs/envoy_bootstrap.stderr.0
```

The above assumes an alloc id that starts with `b36a` and a Connect
sidecar proxy for a service named `count-countdash`.

If the alloc is unable to start successfully, the debugging files are
only accessible from the host filesystem.
This commit is contained in:
Michael Schurter 2022-02-01 15:55:36 -08:00
parent 15205b5408
commit 27b8112123
2 changed files with 56 additions and 35 deletions

3
.changelog/11975.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
consul/connect: write Envoy bootstrapping information to disk for debugging
```

View File

@ -1,15 +1,17 @@
package taskrunner package taskrunner
import ( import (
"bytes"
"context" "context"
"encoding/json"
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"net" "net"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings"
"time" "time"
"github.com/hashicorp/go-hclog" "github.com/hashicorp/go-hclog"
@ -261,6 +263,11 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
// it to the secrets directory like Vault tokens. // it to the secrets directory like Vault tokens.
bootstrapFilePath := filepath.Join(req.TaskDir.SecretsDir, "envoy_bootstrap.json") bootstrapFilePath := filepath.Join(req.TaskDir.SecretsDir, "envoy_bootstrap.json")
// Write everything related to the command to enable debugging
bootstrapStderrPath := filepath.Join(req.TaskDir.LogDir, "envoy_bootstrap.stderr.0")
bootstrapEnvPath := filepath.Join(req.TaskDir.SecretsDir, ".envoy_bootstrap.env")
bootstrapCmdPath := filepath.Join(req.TaskDir.SecretsDir, ".envoy_bootstrap.cmd")
siToken, err := h.maybeLoadSIToken(req.Task.Name, req.TaskDir.SecretsDir) siToken, err := h.maybeLoadSIToken(req.Task.Name, req.TaskDir.SecretsDir)
if err != nil { if err != nil {
h.logger.Error("failed to generate envoy bootstrap config", "sidecar_for", service.Name) h.logger.Error("failed to generate envoy bootstrap config", "sidecar_for", service.Name)
@ -269,16 +276,46 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
h.logger.Debug("check for SI token for task", "task", req.Task.Name, "exists", siToken != "") h.logger.Debug("check for SI token for task", "task", req.Task.Name, "exists", siToken != "")
bootstrap := h.newEnvoyBootstrapArgs(h.alloc.TaskGroup, service, grpcAddr, envoyAdminBind, envoyReadyBind, siToken, bootstrapFilePath) bootstrap := h.newEnvoyBootstrapArgs(h.alloc.TaskGroup, service, grpcAddr, envoyAdminBind, envoyReadyBind, siToken, bootstrapFilePath)
// Create command line arguments
bootstrapArgs := bootstrap.args() bootstrapArgs := bootstrap.args()
// Write args to file for debugging
argsFile, err := os.Create(bootstrapCmdPath)
if err != nil {
return errors.Wrap(err, "failed to write bootstrap command line")
}
defer argsFile.Close()
if _, err := io.WriteString(argsFile, strings.Join(bootstrapArgs, " ")+"\n"); err != nil {
return errors.Wrap(err, "failed to encode bootstrap command line")
}
// Create environment
bootstrapEnv := bootstrap.env(os.Environ()) bootstrapEnv := bootstrap.env(os.Environ())
// Write env to file for debugging
envFile, err := os.Create(bootstrapEnvPath)
if err != nil {
return errors.Wrap(err, "failed to write bootstrap environment")
}
defer envFile.Close()
envEnc := json.NewEncoder(envFile)
envEnc.SetIndent("", " ")
if err := envEnc.Encode(bootstrapEnv); err != nil {
return errors.Wrap(err, "failed to encode bootstrap environment")
}
// keep track of latest error returned from exec-ing consul envoy bootstrap // keep track of latest error returned from exec-ing consul envoy bootstrap
var cmdErr error var cmdErr error
// Since Consul services are registered asynchronously with this task // Since Consul services are registered asynchronously with this task
// hook running, retry until timeout or success. // hook running, retry until timeout or success.
if backoffErr := decay.Backoff(func() (bool, error) { backoffOpts := decay.BackoffOptions{
MaxSleepTime: h.envoyBootstrapWaitTime,
InitialGapSize: h.envoyBoostrapInitialGap,
MaxJitterSize: h.envoyBootstrapMaxJitter,
}
backoffErr := decay.Backoff(func() (bool, error) {
// If hook is killed, just stop. // If hook is killed, just stop.
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -291,22 +328,24 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
cmd.Env = bootstrapEnv cmd.Env = bootstrapEnv
// Redirect stdout to secrets/envoy_bootstrap.json. // Redirect stdout to secrets/envoy_bootstrap.json.
fd, fileErr := os.Create(bootstrapFilePath) stdout, fileErr := os.Create(bootstrapFilePath)
if fileErr != nil { if fileErr != nil {
return false, fmt.Errorf("failed to create secrets/envoy_bootstrap.json for envoy: %w", fileErr) return false, fmt.Errorf("failed to create secrets/envoy_bootstrap.json for envoy: %w", fileErr)
} }
cmd.Stdout = fd defer stdout.Close()
cmd.Stdout = stdout
// Redirect stderr into a buffer for later reading. // Redirect stderr into another file for later debugging.
buf := bytes.NewBuffer(nil) stderr, fileErr := os.OpenFile(bootstrapStderrPath, os.O_RDWR|os.O_CREATE, 0644)
cmd.Stderr = buf if fileErr != nil {
return false, fmt.Errorf("failed to create alloc/logs/envoy_bootstrap.stderr.0 for envoy: %w", fileErr)
}
defer stderr.Close()
cmd.Stderr = stderr
// Generate bootstrap // Generate bootstrap
cmdErr = cmd.Run() cmdErr = cmd.Run()
// Close bootstrap.json regardless of any command errors.
_ = fd.Close()
// Command succeeded, exit. // Command succeeded, exit.
if cmdErr == nil { if cmdErr == nil {
// Bootstrap written. Mark as done and move on. // Bootstrap written. Mark as done and move on.
@ -324,11 +363,9 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
_ = os.Remove(bootstrapFilePath) _ = os.Remove(bootstrapFilePath)
return true, cmdErr return true, cmdErr
}, decay.BackoffOptions{ }, backoffOpts)
MaxSleepTime: h.envoyBootstrapWaitTime,
InitialGapSize: h.envoyBoostrapInitialGap, if backoffErr != nil {
MaxJitterSize: h.envoyBootstrapMaxJitter,
}); backoffErr != nil {
// Wrap the last error from Consul and set that as our status. // Wrap the last error from Consul and set that as our status.
_, recoverable := cmdErr.(*exec.ExitError) _, recoverable := cmdErr.(*exec.ExitError)
return structs.NewRecoverableError( return structs.NewRecoverableError(
@ -394,25 +431,6 @@ func (h *envoyBootstrapHook) writeConfig(filename, config string) error {
return nil return nil
} }
func (h *envoyBootstrapHook) execute(cmd *exec.Cmd) (string, error) {
var (
stdout bytes.Buffer
stderr bytes.Buffer
)
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
_, recoverable := err.(*exec.ExitError)
// ExitErrors are recoverable since they indicate the
// command was runnable but exited with a unsuccessful
// error code.
return stderr.String(), structs.NewRecoverableError(err, recoverable)
}
return stdout.String(), nil
}
// grpcAddress determines the Consul gRPC endpoint address to use. // grpcAddress determines the Consul gRPC endpoint address to use.
// //
// In host networking this will default to 127.0.0.1:8502. // In host networking this will default to 127.0.0.1:8502.