docs: how to troubleshoot consul connect envoy (#15908)
* largely a doc-ification of this commit message: d47678074bf8ae9ff2da3c91d0729bf03aee8446 this doesn't spell out all the possible failure modes, but should be a good starting point for folks. * connect: add doc link to envoy bootstrap error * add Unwrap() to RecoverableError mainly for easier testing
This commit is contained in:
parent
dec41f7f01
commit
335f0a5371
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:improvement
|
||||||
|
docs: link to an envoy troubleshooting doc when envoy bootstrap fails
|
||||||
|
```
|
|
@ -43,6 +43,10 @@ const (
|
||||||
envoyBootstrapMaxJitter = 500 * time.Millisecond
|
envoyBootstrapMaxJitter = 500 * time.Millisecond
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
errEnvoyBootstrapError = errors.New("error creating bootstrap configuration for Connect proxy sidecar")
|
||||||
|
)
|
||||||
|
|
||||||
type consulTransportConfig struct {
|
type consulTransportConfig struct {
|
||||||
HTTPAddr string // required
|
HTTPAddr string // required
|
||||||
Auth string // optional, env CONSUL_HTTP_AUTH
|
Auth string // optional, env CONSUL_HTTP_AUTH
|
||||||
|
@ -373,7 +377,10 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
|
||||||
// Wrap the last error from Consul and set that as our status.
|
// Wrap the last error from Consul and set that as our status.
|
||||||
_, recoverable := cmdErr.(*exec.ExitError)
|
_, recoverable := cmdErr.(*exec.ExitError)
|
||||||
return structs.NewRecoverableError(
|
return structs.NewRecoverableError(
|
||||||
fmt.Errorf("error creating bootstrap configuration for Connect proxy sidecar: %v", cmdErr),
|
fmt.Errorf("%w: %v; see: <https://www.nomadproject.io/s/envoy-bootstrap-error>",
|
||||||
|
errEnvoyBootstrapError,
|
||||||
|
cmdErr,
|
||||||
|
),
|
||||||
recoverable,
|
recoverable,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -672,7 +672,7 @@ func TestTaskRunner_EnvoyBootstrapHook_RecoverableError(t *testing.T) {
|
||||||
|
|
||||||
// Run the hook
|
// Run the hook
|
||||||
err := h.Prestart(context.Background(), req, resp)
|
err := h.Prestart(context.Background(), req, resp)
|
||||||
require.EqualError(t, err, "error creating bootstrap configuration for Connect proxy sidecar: exit status 1")
|
require.ErrorIs(t, err, errEnvoyBootstrapError)
|
||||||
require.True(t, structs.IsRecoverable(err))
|
require.True(t, structs.IsRecoverable(err))
|
||||||
|
|
||||||
// Assert it is not Done
|
// Assert it is not Done
|
||||||
|
@ -760,7 +760,7 @@ func TestTaskRunner_EnvoyBootstrapHook_retryTimeout(t *testing.T) {
|
||||||
|
|
||||||
// Run the hook and get the error
|
// Run the hook and get the error
|
||||||
err := h.Prestart(context.Background(), req, &resp)
|
err := h.Prestart(context.Background(), req, &resp)
|
||||||
require.EqualError(t, err, "error creating bootstrap configuration for Connect proxy sidecar: exit status 1")
|
require.ErrorIs(t, err, errEnvoyBootstrapError)
|
||||||
|
|
||||||
// Current time should be at least start time + total wait time
|
// Current time should be at least start time + total wait time
|
||||||
minimum := begin.Add(h.envoyBootstrapWaitTime)
|
minimum := begin.Add(h.envoyBootstrapWaitTime)
|
||||||
|
|
|
@ -11906,6 +11906,7 @@ type KeyringRequest struct {
|
||||||
type RecoverableError struct {
|
type RecoverableError struct {
|
||||||
Err string
|
Err string
|
||||||
Recoverable bool
|
Recoverable bool
|
||||||
|
wrapped error
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewRecoverableError is used to wrap an error and mark it as recoverable or
|
// NewRecoverableError is used to wrap an error and mark it as recoverable or
|
||||||
|
@ -11918,6 +11919,7 @@ func NewRecoverableError(e error, recoverable bool) error {
|
||||||
return &RecoverableError{
|
return &RecoverableError{
|
||||||
Err: e.Error(),
|
Err: e.Error(),
|
||||||
Recoverable: recoverable,
|
Recoverable: recoverable,
|
||||||
|
wrapped: e,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11940,6 +11942,10 @@ func (r *RecoverableError) IsUnrecoverable() bool {
|
||||||
return !r.Recoverable
|
return !r.Recoverable
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *RecoverableError) Unwrap() error {
|
||||||
|
return r.wrapped
|
||||||
|
}
|
||||||
|
|
||||||
// Recoverable is an interface for errors to implement to indicate whether or
|
// Recoverable is an interface for errors to implement to indicate whether or
|
||||||
// not they are fatal or recoverable.
|
// not they are fatal or recoverable.
|
||||||
type Recoverable interface {
|
type Recoverable interface {
|
||||||
|
|
|
@ -371,6 +371,32 @@ dashes (`-`) are converted to underscores (`_`) in environment variables so
|
||||||
- Prior to Consul 1.9, the Envoy sidecar proxy will drop and stop accepting
|
- Prior to Consul 1.9, the Envoy sidecar proxy will drop and stop accepting
|
||||||
connections while the Nomad agent is restarting.
|
connections while the Nomad agent is restarting.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
If the sidecar service is not running correctly, you can investigate
|
||||||
|
potential `envoy` failures in the following ways:
|
||||||
|
|
||||||
|
* Task logs in the associated `connect-*` task
|
||||||
|
* Task secrets (may contain sensitive information):
|
||||||
|
* envoy CLI command: `secrets/.envoy_bootstrap.cmd`
|
||||||
|
* environment variables: `secrets/.envoy_bootstrap.env`
|
||||||
|
* An extra Allocation log file: `alloc/logs/envoy_bootstrap.stderr.0`
|
||||||
|
|
||||||
|
For example, with an allocation ID starting with `b36a`:
|
||||||
|
|
||||||
|
```shell-session
|
||||||
|
nomad alloc status -short b36a # to get the connect-* task name
|
||||||
|
nomad alloc logs -task connect-proxy-count-api -stderr b36a
|
||||||
|
nomad alloc exec -task connect-proxy-count-api b36a cat secrets/.envoy_bootstrap.cmd
|
||||||
|
nomad alloc exec -task connect-proxy-count-api b36a cat secrets/.envoy_bootstrap.env
|
||||||
|
nomad alloc fs b36a alloc/logs/envoy_bootstrap.stderr.0
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: If the alloc is unable to start successfully, debugging files may
|
||||||
|
only be accessible from the host filesystem. However, the sidecar task secrets
|
||||||
|
directory may not be available in systems where it is mounted in a temporary
|
||||||
|
filesystem.
|
||||||
|
|
||||||
[count-dashboard]: /img/count-dashboard.png
|
[count-dashboard]: /img/count-dashboard.png
|
||||||
[consul_acl]: https://github.com/hashicorp/consul/issues/7414
|
[consul_acl]: https://github.com/hashicorp/consul/issues/7414
|
||||||
[gh-9907]: https://github.com/hashicorp/nomad/issues/9907
|
[gh-9907]: https://github.com/hashicorp/nomad/issues/9907
|
||||||
|
|
Loading…
Reference in New Issue