From 2965dc6a1adaca490466b329f0aed5dc874182df Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 3 May 2022 15:38:32 -0700 Subject: [PATCH] artifact: fix numerous go-getter security issues Fix numerous go-getter security issues: - Add timeouts to http, git, and hg operations to prevent DoS - Add size limit to http to prevent resource exhaustion - Disable following symlinks in both artifacts and `job run` - Stop performing initial HEAD request to avoid file corruption on retries and DoS opportunities. **Approach** Since Nomad has no ability to differentiate a DoS-via-large-artifact vs a legitimate workload, all of the new limits are configurable at the client agent level. The max size of HTTP downloads is also exposed as a node attribute so that if some workloads have large artifacts they can specify a high limit in their jobspecs. In the future all of this plumbing could be extended to enable/disable specific getters or artifact downloading entirely on a per-node basis. --- .changelog/13057.txt | 3 + client/allocrunner/alloc_runner.go | 5 + client/allocrunner/config.go | 3 + .../allocrunner/taskrunner/artifact_hook.go | 8 +- .../taskrunner/artifact_hook_test.go | 9 +- .../allocrunner/taskrunner/getter/getter.go | 147 +++++--- .../taskrunner/getter/getter_test.go | 73 +++- .../allocrunner/taskrunner/getter/testing.go | 18 + client/allocrunner/taskrunner/task_runner.go | 7 + .../taskrunner/task_runner_hooks.go | 2 +- .../taskrunner/task_runner_test.go | 2 + client/allocrunner/testing.go | 3 + client/client.go | 8 + client/config/artifact.go | 74 ++++ client/config/artifact_test.go | 155 ++++++++ client/config/config.go | 4 + client/interfaces/client.go | 12 + command/agent/agent.go | 6 + command/agent/command.go | 5 + command/agent/command_test.go | 14 + command/agent/config.go | 6 + command/helpers.go | 3 + go.mod | 4 +- go.sum | 8 +- nomad/structs/config/artifact.go | 186 +++++++++ nomad/structs/config/artifact_test.go | 352 ++++++++++++++++++ website/content/docs/configuration/client.mdx | 29 ++ .../docs/job-specification/artifact.mdx | 10 + .../content/docs/upgrade/upgrade-specific.mdx | 13 + 29 files changed, 1092 insertions(+), 77 deletions(-) create mode 100644 .changelog/13057.txt create mode 100644 client/allocrunner/taskrunner/getter/testing.go create mode 100644 client/config/artifact.go create mode 100644 client/config/artifact_test.go create mode 100644 nomad/structs/config/artifact.go create mode 100644 nomad/structs/config/artifact_test.go diff --git a/.changelog/13057.txt b/.changelog/13057.txt new file mode 100644 index 000000000..2eac63dcb --- /dev/null +++ b/.changelog/13057.txt @@ -0,0 +1,3 @@ +```release-note:security +A vulnerability was identified in the go-getter library that Nomad uses for its artifacts such that a specially crafted Nomad jobspec can be used for privilege escalation onto client agent hosts. [CVE-2022-30324](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-30324) +``` diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 32b8d4e02..c5294e223 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -182,6 +182,9 @@ type allocRunner struct { // serviceRegWrapper is the handler wrapper that is used by service hooks // to perform service and check registration and deregistration. serviceRegWrapper *wrapper.HandlerWrapper + + // getter is an interface for retrieving artifacts. + getter cinterfaces.ArtifactGetter } // RPCer is the interface needed by hooks to make RPC calls. @@ -226,6 +229,7 @@ func NewAllocRunner(config *Config) (*allocRunner, error) { serversContactedCh: config.ServersContactedCh, rpcClient: config.RPCClient, serviceRegWrapper: config.ServiceRegWrapper, + getter: config.Getter, } // Create the logger based on the allocation ID @@ -280,6 +284,7 @@ func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error { StartConditionMetCtx: ar.taskHookCoordinator.startConditionForTask(task), ShutdownDelayCtx: ar.shutdownDelayCtx, ServiceRegWrapper: ar.serviceRegWrapper, + Getter: ar.getter, } if ar.cpusetManager != nil { diff --git a/client/allocrunner/config.go b/client/allocrunner/config.go index 0ec3ba51c..99d0490ef 100644 --- a/client/allocrunner/config.go +++ b/client/allocrunner/config.go @@ -86,4 +86,7 @@ type Config struct { // ServiceRegWrapper is the handler wrapper that is used by service hooks // to perform service and check registration and deregistration. ServiceRegWrapper *wrapper.HandlerWrapper + + // Getter is an interface for retrieving artifacts. + Getter interfaces.ArtifactGetter } diff --git a/client/allocrunner/taskrunner/artifact_hook.go b/client/allocrunner/taskrunner/artifact_hook.go index 627ee6e42..dae238ecf 100644 --- a/client/allocrunner/taskrunner/artifact_hook.go +++ b/client/allocrunner/taskrunner/artifact_hook.go @@ -7,8 +7,8 @@ import ( log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/client/allocrunner/interfaces" - "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" ti "github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces" + ci "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/nomad/structs" ) @@ -16,11 +16,13 @@ import ( type artifactHook struct { eventEmitter ti.EventEmitter logger log.Logger + getter ci.ArtifactGetter } -func newArtifactHook(e ti.EventEmitter, logger log.Logger) *artifactHook { +func newArtifactHook(e ti.EventEmitter, getter ci.ArtifactGetter, logger log.Logger) *artifactHook { h := &artifactHook{ eventEmitter: e, + getter: getter, } h.logger = logger.Named(h.Name()) return h @@ -40,7 +42,7 @@ func (h *artifactHook) doWork(req *interfaces.TaskPrestartRequest, resp *interfa h.logger.Debug("downloading artifact", "artifact", artifact.GetterSource, "aid", aid) //XXX add ctx to GetArtifact to allow cancelling long downloads - if err := getter.GetArtifact(req.TaskEnv, artifact); err != nil { + if err := h.getter.GetArtifact(req.TaskEnv, artifact); err != nil { wrapped := structs.NewRecoverableError( fmt.Errorf("failed to download artifact %q: %v", artifact.GetterSource, err), diff --git a/client/allocrunner/taskrunner/artifact_hook_test.go b/client/allocrunner/taskrunner/artifact_hook_test.go index 2ddf3d4c9..1bf950616 100644 --- a/client/allocrunner/taskrunner/artifact_hook_test.go +++ b/client/allocrunner/taskrunner/artifact_hook_test.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner/interfaces" + "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/testlog" @@ -38,7 +39,7 @@ func TestTaskRunner_ArtifactHook_Recoverable(t *testing.T) { ci.Parallel(t) me := &mockEmitter{} - artifactHook := newArtifactHook(me, testlog.HCLogger(t)) + artifactHook := newArtifactHook(me, getter.TestDefaultGetter(t), testlog.HCLogger(t)) req := &interfaces.TaskPrestartRequest{ TaskEnv: taskenv.NewEmptyTaskEnv(), @@ -71,7 +72,7 @@ func TestTaskRunner_ArtifactHook_PartialDone(t *testing.T) { ci.Parallel(t) me := &mockEmitter{} - artifactHook := newArtifactHook(me, testlog.HCLogger(t)) + artifactHook := newArtifactHook(me, getter.TestDefaultGetter(t), testlog.HCLogger(t)) // Create a source directory with 1 of the 2 artifacts srcdir := t.TempDir() @@ -159,7 +160,7 @@ func TestTaskRunner_ArtifactHook_ConcurrentDownloadSuccess(t *testing.T) { t.Parallel() me := &mockEmitter{} - artifactHook := newArtifactHook(me, testlog.HCLogger(t)) + artifactHook := newArtifactHook(me, getter.TestDefaultGetter(t), testlog.HCLogger(t)) // Create a source directory all 7 artifacts srcdir := t.TempDir() @@ -246,7 +247,7 @@ func TestTaskRunner_ArtifactHook_ConcurrentDownloadFailure(t *testing.T) { t.Parallel() me := &mockEmitter{} - artifactHook := newArtifactHook(me, testlog.HCLogger(t)) + artifactHook := newArtifactHook(me, getter.TestDefaultGetter(t), testlog.HCLogger(t)) // Create a source directory with 3 of the 4 artifacts srcdir := t.TempDir() diff --git a/client/allocrunner/taskrunner/getter/getter.go b/client/allocrunner/taskrunner/getter/getter.go index 4bbf7674c..f1f42b1ce 100644 --- a/client/allocrunner/taskrunner/getter/getter.go +++ b/client/allocrunner/taskrunner/getter/getter.go @@ -10,63 +10,132 @@ import ( "github.com/hashicorp/go-cleanhttp" gg "github.com/hashicorp/go-getter" + "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/nomad/structs" ) -// httpClient is a shared HTTP client for use across all http/https Getter -// instantiations. The HTTP client is designed to be thread-safe, and using a pooled -// transport will help reduce excessive connections when clients are downloading lots -// of artifacts. -var httpClient = &http.Client{ - Transport: cleanhttp.DefaultPooledTransport(), -} - const ( // gitSSHPrefix is the prefix for downloading via git using ssh gitSSHPrefix = "git@github.com:" ) -// EnvReplacer is an interface which can interpolate environment variables and -// is usually satisfied by taskenv.TaskEnv. -type EnvReplacer interface { - ReplaceEnv(string) string - ClientPath(string, bool) (string, bool) +// Getter wraps go-getter calls in an artifact configuration. +type Getter struct { + // httpClient is a shared HTTP client for use across all http/https + // Getter instantiations. The HTTP client is designed to be + // thread-safe, and using a pooled transport will help reduce excessive + // connections when clients are downloading lots of artifacts. + httpClient *http.Client + config *config.ArtifactConfig +} + +// NewGetter returns a new Getter instance. This function is called once per +// client and shared across alloc and task runners. +func NewGetter(config *config.ArtifactConfig) *Getter { + return &Getter{ + httpClient: &http.Client{ + Transport: cleanhttp.DefaultPooledTransport(), + }, + config: config, + } +} + +// GetArtifact downloads an artifact into the specified task directory. +func (g *Getter) GetArtifact(taskEnv interfaces.EnvReplacer, artifact *structs.TaskArtifact) error { + ggURL, err := getGetterUrl(taskEnv, artifact) + if err != nil { + return newGetError(artifact.GetterSource, err, false) + } + + dest, escapes := taskEnv.ClientPath(artifact.RelativeDest, true) + // Verify the destination is still in the task sandbox after interpolation + if escapes { + return newGetError(artifact.RelativeDest, + errors.New("artifact destination path escapes the alloc directory"), + false) + } + + // Convert from string getter mode to go-getter const + mode := gg.ClientModeAny + switch artifact.GetterMode { + case structs.GetterModeFile: + mode = gg.ClientModeFile + case structs.GetterModeDir: + mode = gg.ClientModeDir + } + + headers := getHeaders(taskEnv, artifact.GetterHeaders) + if err := g.getClient(ggURL, headers, mode, dest).Get(); err != nil { + return newGetError(ggURL, err, true) + } + + return nil } // getClient returns a client that is suitable for Nomad downloading artifacts. -func getClient(src string, headers http.Header, mode gg.ClientMode, dst string) *gg.Client { +func (g *Getter) getClient(src string, headers http.Header, mode gg.ClientMode, dst string) *gg.Client { return &gg.Client{ Src: src, Dst: dst, Mode: mode, Umask: 060000000, - Getters: createGetters(headers), + Getters: g.createGetters(headers), + + // This will prevent copying or writing files through symlinks + DisableSymlinks: true, } } -func createGetters(header http.Header) map[string]gg.Getter { +func (g *Getter) createGetters(header http.Header) map[string]gg.Getter { httpGetter := &gg.HttpGetter{ Netrc: true, - Client: httpClient, + Client: g.httpClient, Header: header, + + // Do not support the custom X-Terraform-Get header and + // associated logic. + XTerraformGetDisabled: true, + + // Disable HEAD requests as they can produce corrupt files when + // retrying a download of a resource that has changed. + // hashicorp/go-getter#219 + DoNotCheckHeadFirst: true, + + // Read timeout for HTTP operations. Must be long enough to + // accommodate large/slow downloads. + ReadTimeout: g.config.HTTPReadTimeout, + + // Maximum download size. Must be large enough to accommodate + // large downloads. + MaxBytes: g.config.HTTPMaxBytes, } + // Explicitly create fresh set of supported Getter for each Client, because // go-getter is not thread-safe. Use a shared HTTP client for http/https Getter, // with pooled transport which is thread-safe. // // If a getter type is not listed here, it is not supported (e.g. file). return map[string]gg.Getter{ - "git": new(gg.GitGetter), - "gcs": new(gg.GCSGetter), - "hg": new(gg.HgGetter), - "s3": new(gg.S3Getter), + "git": &gg.GitGetter{ + Timeout: g.config.GitTimeout, + }, + "hg": &gg.HgGetter{ + Timeout: g.config.HgTimeout, + }, + "gcs": &gg.GCSGetter{ + Timeout: g.config.GCSTimeout, + }, + "s3": &gg.S3Getter{ + Timeout: g.config.S3Timeout, + }, "http": httpGetter, "https": httpGetter, } } // getGetterUrl returns the go-getter URL to download the artifact. -func getGetterUrl(taskEnv EnvReplacer, artifact *structs.TaskArtifact) (string, error) { +func getGetterUrl(taskEnv interfaces.EnvReplacer, artifact *structs.TaskArtifact) (string, error) { source := taskEnv.ReplaceEnv(artifact.GetterSource) // Handle an invalid URL when given a go-getter url such as @@ -98,7 +167,7 @@ func getGetterUrl(taskEnv EnvReplacer, artifact *structs.TaskArtifact) (string, return ggURL, nil } -func getHeaders(env EnvReplacer, m map[string]string) http.Header { +func getHeaders(env interfaces.EnvReplacer, m map[string]string) http.Header { if len(m) == 0 { return nil } @@ -110,38 +179,6 @@ func getHeaders(env EnvReplacer, m map[string]string) http.Header { return headers } -// GetArtifact downloads an artifact into the specified task directory. -func GetArtifact(taskEnv EnvReplacer, artifact *structs.TaskArtifact) error { - ggURL, err := getGetterUrl(taskEnv, artifact) - if err != nil { - return newGetError(artifact.GetterSource, err, false) - } - - dest, escapes := taskEnv.ClientPath(artifact.RelativeDest, true) - // Verify the destination is still in the task sandbox after interpolation - if escapes { - return newGetError(artifact.RelativeDest, - errors.New("artifact destination path escapes the alloc directory"), - false) - } - - // Convert from string getter mode to go-getter const - mode := gg.ClientModeAny - switch artifact.GetterMode { - case structs.GetterModeFile: - mode = gg.ClientModeFile - case structs.GetterModeDir: - mode = gg.ClientModeDir - } - - headers := getHeaders(taskEnv, artifact.GetterHeaders) - if err := getClient(ggURL, headers, mode, dest).Get(); err != nil { - return newGetError(ggURL, err, true) - } - - return nil -} - // GetError wraps the underlying artifact fetching error with the URL. It // implements the RecoverableError interface. type GetError struct { diff --git a/client/allocrunner/taskrunner/getter/getter_test.go b/client/allocrunner/taskrunner/getter/getter_test.go index f9824ceca..6e23d49d0 100644 --- a/client/allocrunner/taskrunner/getter/getter_test.go +++ b/client/allocrunner/taskrunner/getter/getter_test.go @@ -13,7 +13,11 @@ import ( "runtime" "strings" "testing" + "time" + gg "github.com/hashicorp/go-getter" + clientconfig "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/mock" @@ -46,7 +50,7 @@ func (r noopReplacer) ClientPath(p string, join bool) (string, bool) { return path, escapes } -func noopTaskEnv(taskDir string) EnvReplacer { +func noopTaskEnv(taskDir string) interfaces.EnvReplacer { return noopReplacer{ taskDir: taskDir, } @@ -67,6 +71,51 @@ func (u upperReplacer) ClientPath(p string, join bool) (string, bool) { return path, escapes } +func TestGetter_getClient(t *testing.T) { + getter := NewGetter(&clientconfig.ArtifactConfig{ + HTTPReadTimeout: time.Minute, + HTTPMaxBytes: 100_000, + GCSTimeout: 1 * time.Minute, + GitTimeout: 2 * time.Minute, + HgTimeout: 3 * time.Minute, + S3Timeout: 4 * time.Minute, + }) + client := getter.getClient("src", nil, gg.ClientModeAny, "dst") + + t.Run("check symlink config", func(t *testing.T) { + require.True(t, client.DisableSymlinks) + }) + + t.Run("check http config", func(t *testing.T) { + require.True(t, client.Getters["http"].(*gg.HttpGetter).XTerraformGetDisabled) + require.Equal(t, time.Minute, client.Getters["http"].(*gg.HttpGetter).ReadTimeout) + require.Equal(t, int64(100_000), client.Getters["http"].(*gg.HttpGetter).MaxBytes) + }) + + t.Run("check https config", func(t *testing.T) { + require.True(t, client.Getters["https"].(*gg.HttpGetter).XTerraformGetDisabled) + require.Equal(t, time.Minute, client.Getters["https"].(*gg.HttpGetter).ReadTimeout) + require.Equal(t, int64(100_000), client.Getters["https"].(*gg.HttpGetter).MaxBytes) + }) + + t.Run("check gcs config", func(t *testing.T) { + require.Equal(t, client.Getters["gcs"].(*gg.GCSGetter).Timeout, 1*time.Minute) + }) + + t.Run("check git config", func(t *testing.T) { + require.Equal(t, client.Getters["git"].(*gg.GitGetter).Timeout, 2*time.Minute) + }) + + t.Run("check hg config", func(t *testing.T) { + require.Equal(t, client.Getters["hg"].(*gg.HgGetter).Timeout, 3*time.Minute) + }) + + t.Run("check s3 config", func(t *testing.T) { + require.Equal(t, client.Getters["s3"].(*gg.S3Getter).Timeout, 4*time.Minute) + }) + +} + func TestGetArtifact_getHeaders(t *testing.T) { t.Run("nil", func(t *testing.T) { require.Nil(t, getHeaders(noopTaskEnv(""), nil)) @@ -118,10 +167,12 @@ func TestGetArtifact_Headers(t *testing.T) { } // Download the artifact. + getter := TestDefaultGetter(t) taskEnv := upperReplacer{ taskDir: taskDir, } - err := GetArtifact(taskEnv, artifact) + + err := getter.GetArtifact(taskEnv, artifact) require.NoError(t, err) // Verify artifact exists. @@ -151,7 +202,8 @@ func TestGetArtifact_FileAndChecksum(t *testing.T) { } // Download the artifact - if err := GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { + getter := TestDefaultGetter(t) + if err := getter.GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { t.Fatalf("GetArtifact failed: %v", err) } @@ -181,7 +233,8 @@ func TestGetArtifact_File_RelativeDest(t *testing.T) { } // Download the artifact - if err := GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { + getter := TestDefaultGetter(t) + if err := getter.GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { t.Fatalf("GetArtifact failed: %v", err) } @@ -211,7 +264,8 @@ func TestGetArtifact_File_EscapeDest(t *testing.T) { } // attempt to download the artifact - err := GetArtifact(noopTaskEnv(taskDir), artifact) + getter := TestDefaultGetter(t) + err := getter.GetArtifact(noopTaskEnv(taskDir), artifact) if err == nil || !strings.Contains(err.Error(), "escapes") { t.Fatalf("expected GetArtifact to disallow sandbox escape: %v", err) } @@ -257,7 +311,8 @@ func TestGetArtifact_InvalidChecksum(t *testing.T) { } // Download the artifact and expect an error - if err := GetArtifact(noopTaskEnv(taskDir), artifact); err == nil { + getter := TestDefaultGetter(t) + if err := getter.GetArtifact(noopTaskEnv(taskDir), artifact); err == nil { t.Fatalf("GetArtifact should have failed") } } @@ -318,7 +373,8 @@ func TestGetArtifact_Archive(t *testing.T) { }, } - if err := GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { + getter := TestDefaultGetter(t) + if err := getter.GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { t.Fatalf("GetArtifact failed: %v", err) } @@ -349,7 +405,8 @@ func TestGetArtifact_Setuid(t *testing.T) { }, } - require.NoError(t, GetArtifact(noopTaskEnv(taskDir), artifact)) + getter := TestDefaultGetter(t) + require.NoError(t, getter.GetArtifact(noopTaskEnv(taskDir), artifact)) var expected map[string]int diff --git a/client/allocrunner/taskrunner/getter/testing.go b/client/allocrunner/taskrunner/getter/testing.go new file mode 100644 index 000000000..c30b7fc5a --- /dev/null +++ b/client/allocrunner/taskrunner/getter/testing.go @@ -0,0 +1,18 @@ +//go:build !release +// +build !release + +package getter + +import ( + "testing" + + clientconfig "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/nomad/structs/config" + "github.com/stretchr/testify/require" +) + +func TestDefaultGetter(t *testing.T) *Getter { + getterConf, err := clientconfig.ArtifactConfigFromAgent(config.DefaultArtifactConfig()) + require.NoError(t, err) + return NewGetter(getterConf) +} diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index 35625198b..f5342962b 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -244,6 +244,9 @@ type TaskRunner struct { // serviceRegWrapper is the handler wrapper that is used by service hooks // to perform service and check registration and deregistration. serviceRegWrapper *wrapper.HandlerWrapper + + // getter is an interface for retrieving artifacts. + getter cinterfaces.ArtifactGetter } type Config struct { @@ -309,6 +312,9 @@ type Config struct { // ServiceRegWrapper is the handler wrapper that is used by service hooks // to perform service and check registration and deregistration. ServiceRegWrapper *wrapper.HandlerWrapper + + // Getter is an interface for retrieving artifacts. + Getter cinterfaces.ArtifactGetter } func NewTaskRunner(config *Config) (*TaskRunner, error) { @@ -367,6 +373,7 @@ func NewTaskRunner(config *Config) (*TaskRunner, error) { shutdownDelayCtx: config.ShutdownDelayCtx, shutdownDelayCancelFn: config.ShutdownDelayCancelFn, serviceRegWrapper: config.ServiceRegWrapper, + getter: config.Getter, } // Create the logger based on the allocation ID diff --git a/client/allocrunner/taskrunner/task_runner_hooks.go b/client/allocrunner/taskrunner/task_runner_hooks.go index 63b1c0071..0789dd184 100644 --- a/client/allocrunner/taskrunner/task_runner_hooks.go +++ b/client/allocrunner/taskrunner/task_runner_hooks.go @@ -64,7 +64,7 @@ func (tr *TaskRunner) initHooks() { newLogMonHook(tr, hookLogger), newDispatchHook(alloc, hookLogger), newVolumeHook(tr, hookLogger), - newArtifactHook(tr, hookLogger), + newArtifactHook(tr, tr.getter, hookLogger), newStatsHook(tr, tr.clientConfig.StatsCollectionInterval, hookLogger), newDeviceHook(tr.devicemanager, hookLogger), } diff --git a/client/allocrunner/taskrunner/task_runner_test.go b/client/allocrunner/taskrunner/task_runner_test.go index bfedf6396..845ae4e1c 100644 --- a/client/allocrunner/taskrunner/task_runner_test.go +++ b/client/allocrunner/taskrunner/task_runner_test.go @@ -17,6 +17,7 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner/interfaces" + "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" "github.com/hashicorp/nomad/client/config" consulapi "github.com/hashicorp/nomad/client/consul" "github.com/hashicorp/nomad/client/devicemanager" @@ -145,6 +146,7 @@ func testTaskRunnerConfig(t *testing.T, alloc *structs.Allocation, taskName stri ShutdownDelayCtx: shutdownDelayCtx, ShutdownDelayCancelFn: shutdownDelayCancelFn, ServiceRegWrapper: wrapperMock, + Getter: getter.TestDefaultGetter(t), } // Set the cgroup path getter if we are in v2 mode diff --git a/client/allocrunner/testing.go b/client/allocrunner/testing.go index c369c4fe1..b45ef6046 100644 --- a/client/allocrunner/testing.go +++ b/client/allocrunner/testing.go @@ -7,6 +7,7 @@ import ( "sync" "testing" + "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" "github.com/hashicorp/nomad/client/allocwatcher" clientconfig "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/consul" @@ -83,7 +84,9 @@ func testAllocRunnerConfig(t *testing.T, alloc *structs.Allocation) (*Config, fu CpusetManager: new(cgutil.NoopCpusetManager), ServersContactedCh: make(chan struct{}), ServiceRegWrapper: wrapper.NewHandlerWrapper(clientConf.Logger, consulRegMock, nomadRegMock), + Getter: getter.TestDefaultGetter(t), } + return conf, cleanup } diff --git a/client/client.go b/client/client.go index 07beb2c11..a0a207043 100644 --- a/client/client.go +++ b/client/client.go @@ -23,12 +23,14 @@ import ( "github.com/hashicorp/nomad/client/allocrunner" "github.com/hashicorp/nomad/client/allocrunner/interfaces" arstate "github.com/hashicorp/nomad/client/allocrunner/state" + "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" "github.com/hashicorp/nomad/client/allocwatcher" "github.com/hashicorp/nomad/client/config" consulApi "github.com/hashicorp/nomad/client/consul" "github.com/hashicorp/nomad/client/devicemanager" "github.com/hashicorp/nomad/client/dynamicplugins" "github.com/hashicorp/nomad/client/fingerprint" + cinterfaces "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/client/lib/cgutil" "github.com/hashicorp/nomad/client/pluginmanager" "github.com/hashicorp/nomad/client/pluginmanager/csimanager" @@ -319,6 +321,9 @@ type Client struct { // EnterpriseClient is used to set and check enterprise features for clients EnterpriseClient *EnterpriseClient + + // getter is an interface for retrieving artifacts. + getter cinterfaces.ArtifactGetter } var ( @@ -377,6 +382,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie serversContactedCh: make(chan struct{}), serversContactedOnce: sync.Once{}, cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, logger), + getter: getter.NewGetter(cfg.Artifact), EnterpriseClient: newEnterpriseClient(logger), } @@ -1170,6 +1176,7 @@ func (c *Client) restoreState() error { ServersContactedCh: c.serversContactedCh, ServiceRegWrapper: c.serviceRegWrapper, RPCClient: c, + Getter: c.getter, } c.configLock.RUnlock() @@ -2507,6 +2514,7 @@ func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error DriverManager: c.drivermanager, ServiceRegWrapper: c.serviceRegWrapper, RPCClient: c, + Getter: c.getter, } c.configLock.RUnlock() diff --git a/client/config/artifact.go b/client/config/artifact.go new file mode 100644 index 000000000..4daf9b1dc --- /dev/null +++ b/client/config/artifact.go @@ -0,0 +1,74 @@ +package config + +import ( + "fmt" + "time" + + "github.com/dustin/go-humanize" + "github.com/hashicorp/nomad/nomad/structs/config" +) + +// ArtifactConfig is the internal readonly copy of the client agent's +// ArtifactConfig. +type ArtifactConfig struct { + HTTPReadTimeout time.Duration + HTTPMaxBytes int64 + + GCSTimeout time.Duration + GitTimeout time.Duration + HgTimeout time.Duration + S3Timeout time.Duration +} + +// ArtifactConfigFromAgent creates a new internal readonly copy of the client +// agent's ArtifactConfig. The config should have already been validated. +func ArtifactConfigFromAgent(c *config.ArtifactConfig) (*ArtifactConfig, error) { + newConfig := &ArtifactConfig{} + + t, err := time.ParseDuration(*c.HTTPReadTimeout) + if err != nil { + return nil, fmt.Errorf("error parsing HTTPReadTimeout: %w", err) + } + newConfig.HTTPReadTimeout = t + + s, err := humanize.ParseBytes(*c.HTTPMaxSize) + if err != nil { + return nil, fmt.Errorf("error parsing HTTPMaxSize: %w", err) + } + newConfig.HTTPMaxBytes = int64(s) + + t, err = time.ParseDuration(*c.GCSTimeout) + if err != nil { + return nil, fmt.Errorf("error parsing GCSTimeout: %w", err) + } + newConfig.GCSTimeout = t + + t, err = time.ParseDuration(*c.GitTimeout) + if err != nil { + return nil, fmt.Errorf("error parsing GitTimeout: %w", err) + } + newConfig.GitTimeout = t + + t, err = time.ParseDuration(*c.HgTimeout) + if err != nil { + return nil, fmt.Errorf("error parsing HgTimeout: %w", err) + } + newConfig.HgTimeout = t + + t, err = time.ParseDuration(*c.S3Timeout) + if err != nil { + return nil, fmt.Errorf("error parsing S3Timeout: %w", err) + } + newConfig.S3Timeout = t + + return newConfig, nil +} + +func (a *ArtifactConfig) Copy() *ArtifactConfig { + if a == nil { + return nil + } + + newCopy := *a + return &newCopy +} diff --git a/client/config/artifact_test.go b/client/config/artifact_test.go new file mode 100644 index 000000000..0b296f8f8 --- /dev/null +++ b/client/config/artifact_test.go @@ -0,0 +1,155 @@ +package config + +import ( + "testing" + "time" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/nomad/structs/config" + "github.com/stretchr/testify/require" +) + +func TestArtifactConfigFromAgent(t *testing.T) { + ci.Parallel(t) + + testCases := []struct { + name string + config *config.ArtifactConfig + expected *ArtifactConfig + expectedError string + }{ + { + name: "from default", + config: config.DefaultArtifactConfig(), + expected: &ArtifactConfig{ + HTTPReadTimeout: 30 * time.Minute, + HTTPMaxBytes: 100_000_000_000, + GCSTimeout: 30 * time.Minute, + GitTimeout: 30 * time.Minute, + HgTimeout: 30 * time.Minute, + S3Timeout: 30 * time.Minute, + }, + }, + { + name: "invalid http read timeout", + config: &config.ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("invalid"), + HTTPMaxSize: helper.StringToPtr("100GB"), + GCSTimeout: helper.StringToPtr("30m"), + GitTimeout: helper.StringToPtr("30m"), + HgTimeout: helper.StringToPtr("30m"), + S3Timeout: helper.StringToPtr("30m"), + }, + expectedError: "error parsing HTTPReadTimeout", + }, + { + name: "invalid http max size", + config: &config.ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("30m"), + HTTPMaxSize: helper.StringToPtr("invalid"), + GCSTimeout: helper.StringToPtr("30m"), + GitTimeout: helper.StringToPtr("30m"), + HgTimeout: helper.StringToPtr("30m"), + S3Timeout: helper.StringToPtr("30m"), + }, + expectedError: "error parsing HTTPMaxSize", + }, + { + name: "invalid gcs timeout", + config: &config.ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("30m"), + HTTPMaxSize: helper.StringToPtr("100GB"), + GCSTimeout: helper.StringToPtr("invalid"), + GitTimeout: helper.StringToPtr("30m"), + HgTimeout: helper.StringToPtr("30m"), + S3Timeout: helper.StringToPtr("30m"), + }, + expectedError: "error parsing GCSTimeout", + }, + { + name: "invalid git timeout", + config: &config.ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("30m"), + HTTPMaxSize: helper.StringToPtr("100GB"), + GCSTimeout: helper.StringToPtr("30m"), + GitTimeout: helper.StringToPtr("invalid"), + HgTimeout: helper.StringToPtr("30m"), + S3Timeout: helper.StringToPtr("30m"), + }, + expectedError: "error parsing GitTimeout", + }, + { + name: "invalid hg timeout", + config: &config.ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("30m"), + HTTPMaxSize: helper.StringToPtr("100GB"), + GCSTimeout: helper.StringToPtr("30m"), + GitTimeout: helper.StringToPtr("30m"), + HgTimeout: helper.StringToPtr("invalid"), + S3Timeout: helper.StringToPtr("30m"), + }, + expectedError: "error parsing HgTimeout", + }, + { + name: "invalid s3 timeout", + config: &config.ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("30m"), + HTTPMaxSize: helper.StringToPtr("100GB"), + GCSTimeout: helper.StringToPtr("30m"), + GitTimeout: helper.StringToPtr("30m"), + HgTimeout: helper.StringToPtr("30m"), + S3Timeout: helper.StringToPtr("invalid"), + }, + expectedError: "error parsing S3Timeout", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got, err := ArtifactConfigFromAgent(tc.config) + + if tc.expectedError != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tc.expectedError) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, got) + } + }) + } +} + +func TestArtifactConfig_Copy(t *testing.T) { + ci.Parallel(t) + + config := &ArtifactConfig{ + HTTPReadTimeout: time.Minute, + HTTPMaxBytes: 1000, + GCSTimeout: 2 * time.Minute, + GitTimeout: time.Second, + HgTimeout: time.Hour, + S3Timeout: 5 * time.Minute, + } + + // make sure values are copied. + configCopy := config.Copy() + require.Equal(t, config, configCopy) + + // modify copy and make sure original doesn't change. + configCopy.HTTPReadTimeout = 5 * time.Minute + configCopy.HTTPMaxBytes = 2000 + configCopy.GCSTimeout = 5 * time.Second + configCopy.GitTimeout = 3 * time.Second + configCopy.HgTimeout = 2 * time.Hour + configCopy.S3Timeout = 10 * time.Minute + + require.Equal(t, &ArtifactConfig{ + HTTPReadTimeout: time.Minute, + HTTPMaxBytes: 1000, + GCSTimeout: 2 * time.Minute, + GitTimeout: time.Second, + HgTimeout: time.Hour, + S3Timeout: 5 * time.Minute, + }, config) +} diff --git a/client/config/config.go b/client/config/config.go index c1d59ea6b..c66560419 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -290,6 +290,9 @@ type Config struct { // TemplateDialer is our custom HTTP dialer for consul-template. This is // used for template functions which require access to the Nomad API. TemplateDialer *bufconndialer.BufConnWrapper + + // Artifact configuration from the agent's config file. + Artifact *ArtifactConfig } // ClientTemplateConfig is configuration on the client specific to template @@ -695,6 +698,7 @@ func (c *Config) Copy() *Config { nc.ReservableCores = make([]uint16, len(c.ReservableCores)) copy(nc.ReservableCores, c.ReservableCores) } + nc.Artifact = c.Artifact.Copy() return nc } diff --git a/client/interfaces/client.go b/client/interfaces/client.go index 35f28c321..f3fc4a5a8 100644 --- a/client/interfaces/client.go +++ b/client/interfaces/client.go @@ -24,3 +24,15 @@ type AllocStateHandler interface { type DeviceStatsReporter interface { LatestDeviceResourceStats([]*structs.AllocatedDeviceResource) []*device.DeviceGroupStats } + +// EnvReplacer is an interface which can interpolate environment variables and +// is usually satisfied by taskenv.TaskEnv. +type EnvReplacer interface { + ReplaceEnv(string) string + ClientPath(string, bool) (string, bool) +} + +// ArtifactGetter is an interface satisfied by the helper/getter package. +type ArtifactGetter interface { + GetArtifact(taskEnv EnvReplacer, artifact *structs.TaskArtifact) error +} diff --git a/command/agent/agent.go b/command/agent/agent.go index 76472869b..cdfb2135f 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -715,6 +715,12 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { conf.NomadServiceDiscovery = *agentConfig.Client.NomadServiceDiscovery } + artifactConfig, err := clientconfig.ArtifactConfigFromAgent(agentConfig.Client.Artifact) + if err != nil { + return nil, fmt.Errorf("invalid artifact config: %v", err) + } + conf.Artifact = artifactConfig + return conf, nil } diff --git a/command/agent/command.go b/command/agent/command.go index 9ca58ce2c..e6e3a4bdb 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -406,6 +406,11 @@ func (c *Command) IsValidConfig(config, cmdConfig *Config) bool { } } + if err := config.Client.Artifact.Validate(); err != nil { + c.Ui.Error(fmt.Sprintf("client.artifact stanza invalid: %v", err)) + return false + } + if !config.DevMode { // Ensure that we have the directories we need to run. if config.Server.Enabled && config.DataDir == "" { diff --git a/command/agent/command_test.go b/command/agent/command_test.go index c21836a7c..582d72e66 100644 --- a/command/agent/command_test.go +++ b/command/agent/command_test.go @@ -8,11 +8,13 @@ import ( "testing" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/mitchellh/cli" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/version" ) @@ -382,6 +384,18 @@ func TestIsValidConfig(t *testing.T) { }, err: `host_network["test"].reserved_ports "3-2147483647" invalid: port must be < 65536 but found 2147483647`, }, + { + name: "BadArtifact", + conf: Config{ + Client: &ClientConfig{ + Enabled: true, + Artifact: &config.ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("-10m"), + }, + }, + }, + err: "client.artifact stanza invalid: http_read_timeout must be > 0", + }, } for _, tc := range cases { diff --git a/command/agent/config.go b/command/agent/config.go index d233c1b76..50e547592 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -324,6 +324,9 @@ type ClientConfig struct { // correct scheduling decisions on allocations which require this. NomadServiceDiscovery *bool `hcl:"nomad_service_discovery"` + // Artifact contains the configuration for artifacts. + Artifact *config.ArtifactConfig `hcl:"artifact"` + // ExtraKeysHCL is used by hcl to surface unexpected keys ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"` } @@ -974,6 +977,7 @@ func DefaultConfig() *Config { CNIPath: "/opt/cni/bin", CNIConfigDir: "/opt/cni/config", NomadServiceDiscovery: helper.BoolToPtr(true), + Artifact: config.DefaultArtifactConfig(), }, Server: &ServerConfig{ Enabled: false, @@ -1779,6 +1783,8 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { result.CgroupParent = b.CgroupParent } + result.Artifact = a.Artifact.Merge(b.Artifact) + return &result } diff --git a/command/helpers.go b/command/helpers.go index 1d07f9e7e..d8b8ad547 100644 --- a/command/helpers.go +++ b/command/helpers.go @@ -463,6 +463,9 @@ func (j *JobGetter) Get(jpath string) (*api.Job, error) { Src: jpath, Pwd: pwd, Dst: jobFile.Name(), + + // This will prevent copying or writing files through symlinks + DisableSymlinks: true, } if err := client.Get(); err != nil { diff --git a/go.mod b/go.mod index 1eb132da1..0f62fc50b 100644 --- a/go.mod +++ b/go.mod @@ -57,7 +57,7 @@ require ( // versions. github.com/hashicorp/go-discover v0.0.0-20210818145131-c573d69da192 github.com/hashicorp/go-envparse v0.0.0-20180119215841-310ca1881b22 - github.com/hashicorp/go-getter v1.5.11 + github.com/hashicorp/go-getter v1.6.1 github.com/hashicorp/go-hclog v1.2.0 github.com/hashicorp/go-immutable-radix v1.3.1 github.com/hashicorp/go-memdb v1.3.2 @@ -119,7 +119,7 @@ require ( golang.org/x/crypto v0.0.0-20220315160706-3147a52a75dd golang.org/x/net v0.0.0-20220225172249-27dd8689420f golang.org/x/sync v0.0.0-20210220032951-036812b2e83c - golang.org/x/sys v0.0.0-20220315194320-039c03cc5b86 + golang.org/x/sys v0.0.0-20220517195934-5e4e11fc645e golang.org/x/time v0.0.0-20220224211638-0e9765cccd65 google.golang.org/grpc v1.45.0 google.golang.org/protobuf v1.27.1 diff --git a/go.sum b/go.sum index d834f9ecc..1a5af15fc 100644 --- a/go.sum +++ b/go.sum @@ -696,8 +696,8 @@ github.com/hashicorp/go-envparse v0.0.0-20180119215841-310ca1881b22 h1:HTmDIaSN9 github.com/hashicorp/go-envparse v0.0.0-20180119215841-310ca1881b22/go.mod h1:/NlxCzN2D4C4L2uDE6ux/h6jM+n98VFQM14nnCIfHJU= github.com/hashicorp/go-gatedio v0.5.0 h1:Jm1X5yP4yCqqWj5L1TgW7iZwCVPGtVc+mro5r/XX7Tg= github.com/hashicorp/go-gatedio v0.5.0/go.mod h1:Lr3t8L6IyxD3DAeaUxGcgl2JnRUpWMCsmBl4Omu/2t4= -github.com/hashicorp/go-getter v1.5.11 h1:wioTuNmaBU3IE9vdFtFMcmZWj0QzLc6DYaP6sNe5onY= -github.com/hashicorp/go-getter v1.5.11/go.mod h1:9i48BP6wpWweI/0/+FBjqLrp9S8XtwUGjiu0QkWHEaY= +github.com/hashicorp/go-getter v1.6.1 h1:NASsgP4q6tL94WH6nJxKWj8As2H/2kop/bB1d8JMyRY= +github.com/hashicorp/go-getter v1.6.1/go.mod h1:IZCrswsZPeWv9IkVnLElzRU/gz/QPi6pZHn4tv6vbwA= github.com/hashicorp/go-hclog v0.0.0-20180709165350-ff2cf002a8dd/go.mod h1:9bjs9uLqI8l75knNv3lV1kA55veR+WUPSiKIWcQHudI= github.com/hashicorp/go-hclog v0.8.0/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= github.com/hashicorp/go-hclog v0.9.1/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= @@ -1597,8 +1597,8 @@ golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220315194320-039c03cc5b86 h1:A9i04dxx7Cribqbs8jf3FQLogkL/CV2YN7hj9KWJCkc= -golang.org/x/sys v0.0.0-20220315194320-039c03cc5b86/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220517195934-5e4e11fc645e h1:w36l2Uw3dRan1K3TyXriXvY+6T56GNmlKGcqiQUJDfM= +golang.org/x/sys v0.0.0-20220517195934-5e4e11fc645e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 h1:JGgROgKl9N8DuW20oFS5gxc+lE67/N3FcwmBPMe7ArY= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= diff --git a/nomad/structs/config/artifact.go b/nomad/structs/config/artifact.go new file mode 100644 index 000000000..732b4ce87 --- /dev/null +++ b/nomad/structs/config/artifact.go @@ -0,0 +1,186 @@ +package config + +import ( + "fmt" + "math" + "time" + + "github.com/dustin/go-humanize" + "github.com/hashicorp/nomad/helper" +) + +// ArtifactConfig is the configuration specific to the Artifact stanza +type ArtifactConfig struct { + // HTTPReadTimeout is the duration in which a download must complete or + // it will be canceled. Defaults to 30m. + HTTPReadTimeout *string `hcl:"http_read_timeout"` + + // HTTPMaxSize is the maximum size of an artifact that will be downloaded. + // Defaults to 100GB. + HTTPMaxSize *string `hcl:"http_max_size"` + + // GCSTimeout is the duration in which a GCS operation must complete or + // it will be canceled. Defaults to 30m. + GCSTimeout *string `hcl:"gcs_timeout"` + + // GitTimeout is the duration in which a git operation must complete or + // it will be canceled. Defaults to 30m. + GitTimeout *string `hcl:"git_timeout"` + + // HgTimeout is the duration in which an hg operation must complete or + // it will be canceled. Defaults to 30m. + HgTimeout *string `hcl:"hg_timeout"` + + // S3Timeout is the duration in which an S3 operation must complete or + // it will be canceled. Defaults to 30m. + S3Timeout *string `hcl:"s3_timeout"` +} + +func (a *ArtifactConfig) Copy() *ArtifactConfig { + if a == nil { + return nil + } + + newCopy := &ArtifactConfig{} + if a.HTTPReadTimeout != nil { + newCopy.HTTPReadTimeout = helper.StringToPtr(*a.HTTPReadTimeout) + } + if a.HTTPMaxSize != nil { + newCopy.HTTPMaxSize = helper.StringToPtr(*a.HTTPMaxSize) + } + if a.GCSTimeout != nil { + newCopy.GCSTimeout = helper.StringToPtr(*a.GCSTimeout) + } + if a.GitTimeout != nil { + newCopy.GitTimeout = helper.StringToPtr(*a.GitTimeout) + } + if a.HgTimeout != nil { + newCopy.HgTimeout = helper.StringToPtr(*a.HgTimeout) + } + if a.S3Timeout != nil { + newCopy.S3Timeout = helper.StringToPtr(*a.S3Timeout) + } + + return newCopy +} + +func (a *ArtifactConfig) Merge(o *ArtifactConfig) *ArtifactConfig { + if a == nil { + return o.Copy() + } + if o == nil { + return a.Copy() + } + + newCopy := a.Copy() + if o.HTTPReadTimeout != nil { + newCopy.HTTPReadTimeout = helper.StringToPtr(*o.HTTPReadTimeout) + } + if o.HTTPMaxSize != nil { + newCopy.HTTPMaxSize = helper.StringToPtr(*o.HTTPMaxSize) + } + if o.GCSTimeout != nil { + newCopy.GCSTimeout = helper.StringToPtr(*o.GCSTimeout) + } + if o.GitTimeout != nil { + newCopy.GitTimeout = helper.StringToPtr(*o.GitTimeout) + } + if o.HgTimeout != nil { + newCopy.HgTimeout = helper.StringToPtr(*o.HgTimeout) + } + if o.S3Timeout != nil { + newCopy.S3Timeout = helper.StringToPtr(*o.S3Timeout) + } + + return newCopy +} + +func (a *ArtifactConfig) Validate() error { + if a == nil { + return fmt.Errorf("artifact must not be nil") + } + + if a.HTTPReadTimeout == nil { + return fmt.Errorf("http_read_timeout must be set") + } + if v, err := time.ParseDuration(*a.HTTPReadTimeout); err != nil { + return fmt.Errorf("http_read_timeout not a valid duration: %w", err) + } else if v < 0 { + return fmt.Errorf("http_read_timeout must be > 0") + } + + if a.HTTPMaxSize == nil { + return fmt.Errorf("http_max_size must be set") + } + if v, err := humanize.ParseBytes(*a.HTTPMaxSize); err != nil { + return fmt.Errorf("http_max_size not a valid size: %w", err) + } else if v > math.MaxInt64 { + return fmt.Errorf("http_max_size must be < %d but found %d", int64(math.MaxInt64), v) + } + + if a.GCSTimeout == nil { + return fmt.Errorf("gcs_timeout must be set") + } + if v, err := time.ParseDuration(*a.GCSTimeout); err != nil { + return fmt.Errorf("gcs_timeout not a valid duration: %w", err) + } else if v < 0 { + return fmt.Errorf("gcs_timeout must be > 0") + } + + if a.GitTimeout == nil { + return fmt.Errorf("git_timeout must be set") + } + if v, err := time.ParseDuration(*a.GitTimeout); err != nil { + return fmt.Errorf("git_timeout not a valid duration: %w", err) + } else if v < 0 { + return fmt.Errorf("git_timeout must be > 0") + } + + if a.HgTimeout == nil { + return fmt.Errorf("hg_timeout must be set") + } + if v, err := time.ParseDuration(*a.HgTimeout); err != nil { + return fmt.Errorf("hg_timeout not a valid duration: %w", err) + } else if v < 0 { + return fmt.Errorf("hg_timeout must be > 0") + } + + if a.S3Timeout == nil { + return fmt.Errorf("s3_timeout must be set") + } + if v, err := time.ParseDuration(*a.S3Timeout); err != nil { + return fmt.Errorf("s3_timeout not a valid duration: %w", err) + } else if v < 0 { + return fmt.Errorf("s3_timeout must be > 0") + } + + return nil +} + +func DefaultArtifactConfig() *ArtifactConfig { + return &ArtifactConfig{ + // Read timeout for HTTP operations. Must be long enough to + // accommodate large/slow downloads. + HTTPReadTimeout: helper.StringToPtr("30m"), + + // Maximum download size. Must be large enough to accommodate + // large downloads. + HTTPMaxSize: helper.StringToPtr("100GB"), + + // Timeout for GCS operations. Must be long enough to + // accommodate large/slow downloads. + GCSTimeout: helper.StringToPtr("30m"), + + // Timeout for Git operations. Must be long enough to + // accommodate large/slow clones. + GitTimeout: helper.StringToPtr("30m"), + + // Timeout for Hg operations. Must be long enough to + // accommodate large/slow clones. + HgTimeout: helper.StringToPtr("30m"), + + // Timeout for S3 operations. Must be long enough to + // accommodate large/slow downloads. + S3Timeout: helper.StringToPtr("30m"), + } +} diff --git a/nomad/structs/config/artifact_test.go b/nomad/structs/config/artifact_test.go new file mode 100644 index 000000000..e8c78d1f6 --- /dev/null +++ b/nomad/structs/config/artifact_test.go @@ -0,0 +1,352 @@ +package config + +import ( + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" + "github.com/stretchr/testify/require" +) + +func TestArtifactConfig_Copy(t *testing.T) { + ci.Parallel(t) + + a := DefaultArtifactConfig() + b := a.Copy() + require.Equal(t, a, b) + + b.HTTPReadTimeout = helper.StringToPtr("5m") + b.HTTPMaxSize = helper.StringToPtr("2MB") + b.GitTimeout = helper.StringToPtr("3m") + b.HgTimeout = helper.StringToPtr("2m") + require.NotEqual(t, a, b) +} + +func TestArtifactConfig_Merge(t *testing.T) { + ci.Parallel(t) + + testCases := []struct { + name string + source *ArtifactConfig + other *ArtifactConfig + expected *ArtifactConfig + }{ + { + name: "merge all fields", + source: &ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("30m"), + HTTPMaxSize: helper.StringToPtr("100GB"), + GCSTimeout: helper.StringToPtr("30m"), + GitTimeout: helper.StringToPtr("30m"), + HgTimeout: helper.StringToPtr("30m"), + S3Timeout: helper.StringToPtr("30m"), + }, + other: &ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("5m"), + HTTPMaxSize: helper.StringToPtr("2GB"), + GCSTimeout: helper.StringToPtr("1m"), + GitTimeout: helper.StringToPtr("2m"), + HgTimeout: helper.StringToPtr("3m"), + S3Timeout: helper.StringToPtr("4m"), + }, + expected: &ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("5m"), + HTTPMaxSize: helper.StringToPtr("2GB"), + GCSTimeout: helper.StringToPtr("1m"), + GitTimeout: helper.StringToPtr("2m"), + HgTimeout: helper.StringToPtr("3m"), + S3Timeout: helper.StringToPtr("4m"), + }, + }, + { + name: "null source", + source: nil, + other: &ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("5m"), + HTTPMaxSize: helper.StringToPtr("2GB"), + GCSTimeout: helper.StringToPtr("1m"), + GitTimeout: helper.StringToPtr("2m"), + HgTimeout: helper.StringToPtr("3m"), + S3Timeout: helper.StringToPtr("4m"), + }, + expected: &ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("5m"), + HTTPMaxSize: helper.StringToPtr("2GB"), + GCSTimeout: helper.StringToPtr("1m"), + GitTimeout: helper.StringToPtr("2m"), + HgTimeout: helper.StringToPtr("3m"), + S3Timeout: helper.StringToPtr("4m"), + }, + }, + { + name: "null other", + source: &ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("30m"), + HTTPMaxSize: helper.StringToPtr("100GB"), + GCSTimeout: helper.StringToPtr("30m"), + GitTimeout: helper.StringToPtr("30m"), + HgTimeout: helper.StringToPtr("30m"), + S3Timeout: helper.StringToPtr("30m"), + }, + other: nil, + expected: &ArtifactConfig{ + HTTPReadTimeout: helper.StringToPtr("30m"), + HTTPMaxSize: helper.StringToPtr("100GB"), + GCSTimeout: helper.StringToPtr("30m"), + GitTimeout: helper.StringToPtr("30m"), + HgTimeout: helper.StringToPtr("30m"), + S3Timeout: helper.StringToPtr("30m"), + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got := tc.source.Merge(tc.other) + require.Equal(t, tc.expected, got) + }) + } +} + +func TestArtifactConfig_Validate(t *testing.T) { + ci.Parallel(t) + + testCases := []struct { + name string + config func(*ArtifactConfig) + expectedError string + }{ + { + name: "default config is valid", + config: nil, + expectedError: "", + }, + { + name: "missing http read timeout", + config: func(a *ArtifactConfig) { + a.HTTPReadTimeout = nil + }, + expectedError: "http_read_timeout must be set", + }, + { + name: "http read timeout is invalid", + config: func(a *ArtifactConfig) { + a.HTTPReadTimeout = helper.StringToPtr("invalid") + }, + expectedError: "http_read_timeout not a valid duration", + }, + { + name: "http read timeout is empty", + config: func(a *ArtifactConfig) { + a.HTTPReadTimeout = helper.StringToPtr("") + }, + expectedError: "http_read_timeout not a valid duration", + }, + { + name: "http read timeout is zero", + config: func(a *ArtifactConfig) { + a.HTTPReadTimeout = helper.StringToPtr("0") + }, + expectedError: "", + }, + { + name: "http read timeout is negative", + config: func(a *ArtifactConfig) { + a.HTTPReadTimeout = helper.StringToPtr("-10m") + }, + expectedError: "http_read_timeout must be > 0", + }, + { + name: "http max size is missing", + config: func(a *ArtifactConfig) { + a.HTTPMaxSize = nil + }, + expectedError: "http_max_size must be set", + }, + { + name: "http max size is invalid", + config: func(a *ArtifactConfig) { + a.HTTPMaxSize = helper.StringToPtr("invalid") + }, + expectedError: "http_max_size not a valid size", + }, + { + name: "http max size is empty", + config: func(a *ArtifactConfig) { + a.HTTPMaxSize = helper.StringToPtr("") + }, + expectedError: "http_max_size not a valid size", + }, + { + name: "http max size is zero", + config: func(a *ArtifactConfig) { + a.HTTPMaxSize = helper.StringToPtr("0") + }, + expectedError: "", + }, + { + name: "http max size is negative", + config: func(a *ArtifactConfig) { + a.HTTPMaxSize = helper.StringToPtr("-l0MB") + }, + expectedError: "http_max_size not a valid size", + }, + { + name: "gcs timeout is missing", + config: func(a *ArtifactConfig) { + a.GCSTimeout = nil + }, + expectedError: "gcs_timeout must be set", + }, + { + name: "gcs timeout is invalid", + config: func(a *ArtifactConfig) { + a.GCSTimeout = helper.StringToPtr("invalid") + }, + expectedError: "gcs_timeout not a valid duration", + }, + { + name: "gcs timeout is empty", + config: func(a *ArtifactConfig) { + a.GCSTimeout = helper.StringToPtr("") + }, + expectedError: "gcs_timeout not a valid duration", + }, + { + name: "gcs timeout is zero", + config: func(a *ArtifactConfig) { + a.GCSTimeout = helper.StringToPtr("0") + }, + expectedError: "", + }, + { + name: "gcs timeout is negative", + config: func(a *ArtifactConfig) { + a.GCSTimeout = helper.StringToPtr("-l0m") + }, + expectedError: "gcs_timeout not a valid duration", + }, + { + name: "git timeout is missing", + config: func(a *ArtifactConfig) { + a.GitTimeout = nil + }, + expectedError: "git_timeout must be set", + }, + { + name: "git timeout is invalid", + config: func(a *ArtifactConfig) { + a.GitTimeout = helper.StringToPtr("invalid") + }, + expectedError: "git_timeout not a valid duration", + }, + { + name: "git timeout is empty", + config: func(a *ArtifactConfig) { + a.GitTimeout = helper.StringToPtr("") + }, + expectedError: "git_timeout not a valid duration", + }, + { + name: "git timeout is zero", + config: func(a *ArtifactConfig) { + a.GitTimeout = helper.StringToPtr("0") + }, + expectedError: "", + }, + { + name: "git timeout is negative", + config: func(a *ArtifactConfig) { + a.GitTimeout = helper.StringToPtr("-l0m") + }, + expectedError: "git_timeout not a valid duration", + }, + { + name: "hg timeout is missing", + config: func(a *ArtifactConfig) { + a.HgTimeout = nil + }, + expectedError: "hg_timeout must be set", + }, + { + name: "hg timeout is invalid", + config: func(a *ArtifactConfig) { + a.HgTimeout = helper.StringToPtr("invalid") + }, + expectedError: "hg_timeout not a valid duration", + }, + { + name: "hg timeout is empty", + config: func(a *ArtifactConfig) { + a.HgTimeout = helper.StringToPtr("") + }, + expectedError: "hg_timeout not a valid duration", + }, + { + name: "hg timeout is zero", + config: func(a *ArtifactConfig) { + a.HgTimeout = helper.StringToPtr("0") + }, + expectedError: "", + }, + { + name: "hg timeout is negative", + config: func(a *ArtifactConfig) { + a.HgTimeout = helper.StringToPtr("-l0m") + }, + expectedError: "hg_timeout not a valid duration", + }, + { + name: "s3 timeout is missing", + config: func(a *ArtifactConfig) { + a.S3Timeout = nil + }, + expectedError: "s3_timeout must be set", + }, + { + name: "s3 timeout is invalid", + config: func(a *ArtifactConfig) { + a.S3Timeout = helper.StringToPtr("invalid") + }, + expectedError: "s3_timeout not a valid duration", + }, + { + name: "s3 timeout is empty", + config: func(a *ArtifactConfig) { + a.S3Timeout = helper.StringToPtr("") + }, + expectedError: "s3_timeout not a valid duration", + }, + { + name: "s3 timeout is zero", + config: func(a *ArtifactConfig) { + a.S3Timeout = helper.StringToPtr("0") + }, + expectedError: "", + }, + { + name: "s3 timeout is negative", + config: func(a *ArtifactConfig) { + a.S3Timeout = helper.StringToPtr("-l0m") + }, + expectedError: "s3_timeout not a valid duration", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + a := DefaultArtifactConfig() + if tc.config != nil { + tc.config(a) + } + + err := a.Validate() + if tc.expectedError != "" { + require.Error(t, err) + require.ErrorContains(t, err, tc.expectedError) + } else { + require.NoError(t, err) + } + }) + } +} diff --git a/website/content/docs/configuration/client.mdx b/website/content/docs/configuration/client.mdx index 9a71fd473..26e4581eb 100644 --- a/website/content/docs/configuration/client.mdx +++ b/website/content/docs/configuration/client.mdx @@ -142,6 +142,10 @@ client { - `bridge_network_subnet` `(string: "172.26.64.0/20")` - Specifies the subnet which the client will use to allocate IP addresses from. +- `artifact` ([Artifact](#artifact-parameters): varied) - + Specifies controls on the behavior of task + [`artifact`](/docs/job-specification/artifact) stanzas. + - `template` ([Template](#template-parameters): nil) - Specifies controls on the behavior of task [`template`](/docs/job-specification/template) stanzas. @@ -206,6 +210,31 @@ chroot as doing so would cause infinite recursion. reserve on all fingerprinted network devices. Ranges can be specified by using a hyphen separated the two inclusive ends. +### `artifact` Parameters + +- `http_read_timeout` `(string: "30m")` - Specifies the maximum duration in + which an HTTP download request must complete before it is canceled. Set to + `0` to not enforce a limit. + +- `http_max_size` `(string: "100GB")` - Specifies the maximum size allowed for + artifacts downloaded via HTTP. Set to `0` to not enforce a limit. + +- `gcs_timeout` `(string: "30m")` - Specifies the maximum duration in which a + Google Cloud Storate operation must complete before it is canceled. Set to + `0` to not enforce a limit. + +- `git_timeout` `(string: "30m")` - Specifies the maximum duration in which a + Git operation must complete before it is canceled. Set to `0` to not enforce + a limit. + +- `hg_timeout` `(string: "30m")` - Specifies the maximum duration in which a + Mercurial operation must complete before it is canceled. Set to `0` to not + enforce a limit. + +- `s3_timeout` `(string: "30m")` - Specifies the maximum duration in which an + S3 operation must complete before it is canceled. Set to `0` to not enforce a + limit. + ### `template` Parameters - `function_denylist` `([]string: ["plugin", "writeToFile"])` - Specifies a diff --git a/website/content/docs/job-specification/artifact.mdx b/website/content/docs/job-specification/artifact.mdx index fc3b62713..2a5e1a843 100644 --- a/website/content/docs/job-specification/artifact.mdx +++ b/website/content/docs/job-specification/artifact.mdx @@ -62,6 +62,15 @@ automatically unarchived before the starting the task. - `source` `(string: )` - Specifies the URL of the artifact to download. See [`go-getter`][go-getter] for details. +## Operation Limits + +The client [`artifact`][client_artifact] configuration can set limits to +specific artifact operations to prevent excessive data download or operation +time. + +If a task's `artifact` retrieval exceeds one of those limits, the task will be +interrupted and fail to start. Refer to the task events for more information. + ## `artifact` Examples The following examples only show the `artifact` stanzas. Remember that the @@ -235,6 +244,7 @@ artifact { } ``` +[client_artifact]: /docs/configuration/client#artifact-parameters [go-getter]: https://github.com/hashicorp/go-getter 'HashiCorp go-getter Library' [go-getter-headers]: https://github.com/hashicorp/go-getter#headers 'HashiCorp go-getter Headers' [minio]: https://www.minio.io/ diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index 17533a45c..5b9dc4402 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -22,6 +22,18 @@ upgrade. However, specific versions of Nomad may have more details provided for their upgrades as a result of new features or changed behavior. This page is used to document those details separately from the standard upgrade flow. +## Nomad 1.3.1, 1.2.8, 1.1.14 + +#### Default `artifact` limits + +Nomad 1.3.1, 1.2.8, and 1.1.14 introduced mechanisms to limit the size of +`artifact` downloads and how long these operations can take. The limits are +defined in the new [`artifact`client configuration][client_artifact] and have +predefined default values. + +While the defaults set are fairly large, it is recommended to double-check them +prior to upgrading your Nomad clients to make sure they fit your needs. + ## Nomad 1.3.0 #### Raft Protocol Version 2 Deprecation @@ -1371,6 +1383,7 @@ deleted and then Nomad 0.3.0 can be launched. [api_jobs_parse]: /api-docs/jobs#parse-job [cgroups2]: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html [cgroup_parent]: /docs/configuration/client#cgroup_parent +[client_artifact]: /docs/configuration/client#artifact-parameters [cores]: /docs/job-specification/resources#cores [dangling-containers]: /docs/drivers/docker#dangling-containers [drain-api]: /api-docs/nodes#drain-node