client: recover from getter panics (#14696)
The artifact getter uses the go-getter library to fetch files from different sources. Any bug in this library that results in a panic can cause the entire Nomad client to crash due to a single file download attempt. This change aims to guard against this types of crashes by recovering from panics when the getter attempts to download an artifact. The resulting panic is converted to an error that is stored as a task event for operator visibility and the panic stack trace is logged to the client's log.
This commit is contained in:
parent
b554f9344a
commit
5c100c0d3d
|
@ -0,0 +1,3 @@
|
|||
```release-note:security
|
||||
client: recover from panics caused by artifact download to prevent the Nomad client from crashing
|
||||
```
|
|
@ -5,10 +5,12 @@ import (
|
|||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
gg "github.com/hashicorp/go-getter"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/interfaces"
|
||||
|
@ -22,6 +24,8 @@ const (
|
|||
|
||||
// Getter wraps go-getter calls in an artifact configuration.
|
||||
type Getter struct {
|
||||
logger hclog.Logger
|
||||
|
||||
// httpClient is a shared HTTP client for use across all http/https
|
||||
// Getter instantiations. The HTTP client is designed to be
|
||||
// thread-safe, and using a pooled transport will help reduce excessive
|
||||
|
@ -32,8 +36,9 @@ type Getter struct {
|
|||
|
||||
// NewGetter returns a new Getter instance. This function is called once per
|
||||
// client and shared across alloc and task runners.
|
||||
func NewGetter(config *config.ArtifactConfig) *Getter {
|
||||
func NewGetter(logger hclog.Logger, config *config.ArtifactConfig) *Getter {
|
||||
return &Getter{
|
||||
logger: logger,
|
||||
httpClient: &http.Client{
|
||||
Transport: cleanhttp.DefaultPooledTransport(),
|
||||
},
|
||||
|
@ -42,7 +47,19 @@ func NewGetter(config *config.ArtifactConfig) *Getter {
|
|||
}
|
||||
|
||||
// GetArtifact downloads an artifact into the specified task directory.
|
||||
func (g *Getter) GetArtifact(taskEnv interfaces.EnvReplacer, artifact *structs.TaskArtifact) error {
|
||||
func (g *Getter) GetArtifact(taskEnv interfaces.EnvReplacer, artifact *structs.TaskArtifact) (returnErr error) {
|
||||
// Recover from panics to avoid crashing the entire Nomad client due to
|
||||
// artifact download failures, such as bugs in go-getter.
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
g.logger.Error("panic while downloading artifact",
|
||||
"artifact", artifact.GetterSource,
|
||||
"error", r,
|
||||
"stack", string(debug.Stack()))
|
||||
returnErr = fmt.Errorf("getter panic: %v", r)
|
||||
}
|
||||
}()
|
||||
|
||||
ggURL, err := getGetterUrl(taskEnv, artifact)
|
||||
if err != nil {
|
||||
return newGetError(artifact.GetterSource, err, false)
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
"time"
|
||||
|
||||
gg "github.com/hashicorp/go-getter"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
clientconfig "github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/interfaces"
|
||||
"github.com/hashicorp/nomad/client/taskenv"
|
||||
|
@ -56,6 +57,19 @@ func noopTaskEnv(taskDir string) interfaces.EnvReplacer {
|
|||
}
|
||||
}
|
||||
|
||||
// panicReplacer is a version of taskenv.TaskEnv.ReplaceEnv that panics.
|
||||
type panicReplacer struct{}
|
||||
|
||||
func (panicReplacer) ReplaceEnv(_ string) string {
|
||||
panic("panic")
|
||||
}
|
||||
func (panicReplacer) ClientPath(_ string, _ bool) (string, bool) {
|
||||
panic("panic")
|
||||
}
|
||||
func panicTaskEnv() interfaces.EnvReplacer {
|
||||
return panicReplacer{}
|
||||
}
|
||||
|
||||
// upperReplacer is a version of taskenv.TaskEnv.ReplaceEnv that upper-cases
|
||||
// the given input.
|
||||
type upperReplacer struct {
|
||||
|
@ -72,7 +86,7 @@ func (u upperReplacer) ClientPath(p string, join bool) (string, bool) {
|
|||
}
|
||||
|
||||
func TestGetter_getClient(t *testing.T) {
|
||||
getter := NewGetter(&clientconfig.ArtifactConfig{
|
||||
getter := NewGetter(hclog.NewNullLogger(), &clientconfig.ArtifactConfig{
|
||||
HTTPReadTimeout: time.Minute,
|
||||
HTTPMaxBytes: 100_000,
|
||||
GCSTimeout: 1 * time.Minute,
|
||||
|
@ -436,6 +450,15 @@ func TestGetArtifact_Setuid(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// TestGetArtifact_handlePanic tests that a panic during the getter execution
|
||||
// does not cause its goroutine to crash.
|
||||
func TestGetArtifact_handlePanic(t *testing.T) {
|
||||
getter := TestDefaultGetter(t)
|
||||
err := getter.GetArtifact(panicTaskEnv(), &structs.TaskArtifact{})
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "panic")
|
||||
}
|
||||
|
||||
func TestGetGetterUrl_Queries(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
|
|
|
@ -6,6 +6,7 @@ package getter
|
|||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/go-hclog"
|
||||
clientconfig "github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs/config"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
@ -14,5 +15,5 @@ import (
|
|||
func TestDefaultGetter(t *testing.T) *Getter {
|
||||
getterConf, err := clientconfig.ArtifactConfigFromAgent(config.DefaultArtifactConfig())
|
||||
require.NoError(t, err)
|
||||
return NewGetter(getterConf)
|
||||
return NewGetter(hclog.NewNullLogger(), getterConf)
|
||||
}
|
||||
|
|
|
@ -396,7 +396,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie
|
|||
serversContactedCh: make(chan struct{}),
|
||||
serversContactedOnce: sync.Once{},
|
||||
cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, cfg.ReservableCores, logger),
|
||||
getter: getter.NewGetter(cfg.Artifact),
|
||||
getter: getter.NewGetter(logger.Named("artifact_getter"), cfg.Artifact),
|
||||
EnterpriseClient: newEnterpriseClient(logger),
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue