docs: how to troubleshoot consul connect envoy (#15908)
* largely a doc-ification of this commit message: d47678074bf8ae9ff2da3c91d0729bf03aee8446 this doesn't spell out all the possible failure modes, but should be a good starting point for folks. * connect: add doc link to envoy bootstrap error * add Unwrap() to RecoverableError mainly for easier testing
This commit is contained in:
parent
dec41f7f01
commit
335f0a5371
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
docs: link to an envoy troubleshooting doc when envoy bootstrap fails
|
||||
```
|
|
@ -43,6 +43,10 @@ const (
|
|||
envoyBootstrapMaxJitter = 500 * time.Millisecond
|
||||
)
|
||||
|
||||
var (
|
||||
errEnvoyBootstrapError = errors.New("error creating bootstrap configuration for Connect proxy sidecar")
|
||||
)
|
||||
|
||||
type consulTransportConfig struct {
|
||||
HTTPAddr string // required
|
||||
Auth string // optional, env CONSUL_HTTP_AUTH
|
||||
|
@ -373,7 +377,10 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
|
|||
// Wrap the last error from Consul and set that as our status.
|
||||
_, recoverable := cmdErr.(*exec.ExitError)
|
||||
return structs.NewRecoverableError(
|
||||
fmt.Errorf("error creating bootstrap configuration for Connect proxy sidecar: %v", cmdErr),
|
||||
fmt.Errorf("%w: %v; see: <https://www.nomadproject.io/s/envoy-bootstrap-error>",
|
||||
errEnvoyBootstrapError,
|
||||
cmdErr,
|
||||
),
|
||||
recoverable,
|
||||
)
|
||||
}
|
||||
|
|
|
@ -672,7 +672,7 @@ func TestTaskRunner_EnvoyBootstrapHook_RecoverableError(t *testing.T) {
|
|||
|
||||
// Run the hook
|
||||
err := h.Prestart(context.Background(), req, resp)
|
||||
require.EqualError(t, err, "error creating bootstrap configuration for Connect proxy sidecar: exit status 1")
|
||||
require.ErrorIs(t, err, errEnvoyBootstrapError)
|
||||
require.True(t, structs.IsRecoverable(err))
|
||||
|
||||
// Assert it is not Done
|
||||
|
@ -760,7 +760,7 @@ func TestTaskRunner_EnvoyBootstrapHook_retryTimeout(t *testing.T) {
|
|||
|
||||
// Run the hook and get the error
|
||||
err := h.Prestart(context.Background(), req, &resp)
|
||||
require.EqualError(t, err, "error creating bootstrap configuration for Connect proxy sidecar: exit status 1")
|
||||
require.ErrorIs(t, err, errEnvoyBootstrapError)
|
||||
|
||||
// Current time should be at least start time + total wait time
|
||||
minimum := begin.Add(h.envoyBootstrapWaitTime)
|
||||
|
|
|
@ -11906,6 +11906,7 @@ type KeyringRequest struct {
|
|||
type RecoverableError struct {
|
||||
Err string
|
||||
Recoverable bool
|
||||
wrapped error
|
||||
}
|
||||
|
||||
// NewRecoverableError is used to wrap an error and mark it as recoverable or
|
||||
|
@ -11918,6 +11919,7 @@ func NewRecoverableError(e error, recoverable bool) error {
|
|||
return &RecoverableError{
|
||||
Err: e.Error(),
|
||||
Recoverable: recoverable,
|
||||
wrapped: e,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11940,6 +11942,10 @@ func (r *RecoverableError) IsUnrecoverable() bool {
|
|||
return !r.Recoverable
|
||||
}
|
||||
|
||||
func (r *RecoverableError) Unwrap() error {
|
||||
return r.wrapped
|
||||
}
|
||||
|
||||
// Recoverable is an interface for errors to implement to indicate whether or
|
||||
// not they are fatal or recoverable.
|
||||
type Recoverable interface {
|
||||
|
|
|
@ -371,6 +371,32 @@ dashes (`-`) are converted to underscores (`_`) in environment variables so
|
|||
- Prior to Consul 1.9, the Envoy sidecar proxy will drop and stop accepting
|
||||
connections while the Nomad agent is restarting.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
If the sidecar service is not running correctly, you can investigate
|
||||
potential `envoy` failures in the following ways:
|
||||
|
||||
* Task logs in the associated `connect-*` task
|
||||
* Task secrets (may contain sensitive information):
|
||||
* envoy CLI command: `secrets/.envoy_bootstrap.cmd`
|
||||
* environment variables: `secrets/.envoy_bootstrap.env`
|
||||
* An extra Allocation log file: `alloc/logs/envoy_bootstrap.stderr.0`
|
||||
|
||||
For example, with an allocation ID starting with `b36a`:
|
||||
|
||||
```shell-session
|
||||
nomad alloc status -short b36a # to get the connect-* task name
|
||||
nomad alloc logs -task connect-proxy-count-api -stderr b36a
|
||||
nomad alloc exec -task connect-proxy-count-api b36a cat secrets/.envoy_bootstrap.cmd
|
||||
nomad alloc exec -task connect-proxy-count-api b36a cat secrets/.envoy_bootstrap.env
|
||||
nomad alloc fs b36a alloc/logs/envoy_bootstrap.stderr.0
|
||||
```
|
||||
|
||||
Note: If the alloc is unable to start successfully, debugging files may
|
||||
only be accessible from the host filesystem. However, the sidecar task secrets
|
||||
directory may not be available in systems where it is mounted in a temporary
|
||||
filesystem.
|
||||
|
||||
[count-dashboard]: /img/count-dashboard.png
|
||||
[consul_acl]: https://github.com/hashicorp/consul/issues/7414
|
||||
[gh-9907]: https://github.com/hashicorp/nomad/issues/9907
|
||||
|
|
Loading…
Reference in New Issue