From 72cbe53f198049e4c793d572ce7fd928b24ff3fb Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 24 Apr 2023 10:00:27 -0400 Subject: [PATCH] logs: allow disabling log collection in jobspec (#16962) Some Nomad users ship application logs out-of-band via syslog. For these users having `logmon` (and `docker_logger`) running is unnecessary overhead. Allow disabling the logmon and pointing the task's stdout/stderr to /dev/null. This changeset is the first of several incremental improvements to log collection short of full-on logging plugins. The next step will likely be to extend the internal-only task driver configuration so that cluster administrators can turn off log collection for the entire driver. --- Fixes: #11175 Co-authored-by: Thomas Weber --- .changelog/16962.txt | 3 ++ api/tasks.go | 9 +++- client/allocrunner/taskrunner/logmon_hook.go | 43 +++++++++++---- .../taskrunner/logmon_hook_test.go | 52 ++++++++++++++++++- .../taskrunner/logmon_hook_unix_test.go | 4 +- .../taskrunner/task_runner_hooks.go | 2 +- command/agent/job_endpoint.go | 2 + command/agent/job_endpoint_test.go | 8 +++ drivers/docker/driver.go | 14 ++++- drivers/docker/driver_test.go | 33 +++++++++++- jobspec/parse_task.go | 1 + jobspec/parse_test.go | 1 + jobspec/test-fixtures/basic.hcl | 1 + nomad/structs/diff_test.go | 30 +++++++++++ nomad/structs/structs.go | 12 ++++- scheduler/annotate.go | 12 ++++- scheduler/annotate_test.go | 21 ++++++++ scheduler/util.go | 7 +++ website/content/api-docs/client.mdx | 4 +- website/content/api-docs/jobs.mdx | 4 ++ website/content/api-docs/json-jobs.mdx | 3 ++ .../content/docs/job-specification/logs.mdx | 24 ++++++--- 22 files changed, 262 insertions(+), 28 deletions(-) create mode 100644 .changelog/16962.txt diff --git a/.changelog/16962.txt b/.changelog/16962.txt new file mode 100644 index 000000000..10310994b --- /dev/null +++ b/.changelog/16962.txt @@ -0,0 +1,3 @@ +```release-note:improvement +jobspec: Added option for disabling task log collection in the `logs` block +``` diff --git a/api/tasks.go b/api/tasks.go index e928b3a04..a1ee8047b 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -639,14 +639,16 @@ func (g *TaskGroup) AddSpread(s *Spread) *TaskGroup { // LogConfig provides configuration for log rotation type LogConfig struct { - MaxFiles *int `mapstructure:"max_files" hcl:"max_files,optional"` - MaxFileSizeMB *int `mapstructure:"max_file_size" hcl:"max_file_size,optional"` + MaxFiles *int `mapstructure:"max_files" hcl:"max_files,optional"` + MaxFileSizeMB *int `mapstructure:"max_file_size" hcl:"max_file_size,optional"` + Enabled *bool `mapstructure:"enabled" hcl:"enabled,optional"` } func DefaultLogConfig() *LogConfig { return &LogConfig{ MaxFiles: pointerOf(10), MaxFileSizeMB: pointerOf(10), + Enabled: pointerOf(true), } } @@ -657,6 +659,9 @@ func (l *LogConfig) Canonicalize() { if l.MaxFileSizeMB == nil { l.MaxFileSizeMB = pointerOf(10) } + if l.Enabled == nil { + l.Enabled = pointerOf(true) + } } // DispatchPayloadConfig configures how a task gets its input from a job dispatch diff --git a/client/allocrunner/taskrunner/logmon_hook.go b/client/allocrunner/taskrunner/logmon_hook.go index 7c510a041..4ce23a250 100644 --- a/client/allocrunner/taskrunner/logmon_hook.go +++ b/client/allocrunner/taskrunner/logmon_hook.go @@ -7,6 +7,7 @@ import ( "context" "encoding/json" "fmt" + "os" "path/filepath" "runtime" "time" @@ -45,6 +46,7 @@ type logmonHook struct { type logmonHookConfig struct { logDir string + enabled bool stdoutFifo string stderrFifo string } @@ -59,10 +61,19 @@ func newLogMonHook(tr *TaskRunner, logger hclog.Logger) *logmonHook { return hook } -func newLogMonHookConfig(taskName, logDir string) *logmonHookConfig { +func newLogMonHookConfig(taskName string, logCfg *structs.LogConfig, logDir string) *logmonHookConfig { cfg := &logmonHookConfig{ - logDir: logDir, + logDir: logDir, + enabled: logCfg.Enabled, } + + // If logging is disabled configure task's stdout/err to point to devnull + if !logCfg.Enabled { + cfg.stdoutFifo = os.DevNull + cfg.stderrFifo = os.DevNull + return cfg + } + if runtime.GOOS == "windows" { id := uuid.Generate()[:8] cfg.stdoutFifo = fmt.Sprintf("//./pipe/%s-%s.stdout", taskName, id) @@ -105,9 +116,7 @@ func reattachConfigFromHookData(data map[string]string) (*plugin.ReattachConfig, func (h *logmonHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { - - if h.isLoggingDisabled() { - h.logger.Debug("logging is disabled by driver") + if !h.isLoggingEnabled() { return nil } @@ -142,14 +151,27 @@ func (h *logmonHook) Prestart(ctx context.Context, } } -func (h *logmonHook) isLoggingDisabled() bool { - ic, ok := h.runner.driver.(drivers.InternalCapabilitiesDriver) - if !ok { +func (h *logmonHook) isLoggingEnabled() bool { + if !h.config.enabled { + h.logger.Debug("log collection is disabled by task") return false } + // internal plugins have access to a capability to disable logging and + // metrics via a private interface; this is an "experimental" interface and + // currently only the docker driver exposes this to users. + ic, ok := h.runner.driver.(drivers.InternalCapabilitiesDriver) + if !ok { + return true + } + caps := ic.InternalCapabilities() - return caps.DisableLogCollection + if caps.DisableLogCollection { + h.logger.Debug("log collection is disabled by driver") + return false + } + + return true } func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPrestartRequest) error { @@ -197,6 +219,9 @@ func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPr } func (h *logmonHook) Stop(_ context.Context, req *interfaces.TaskStopRequest, _ *interfaces.TaskStopResponse) error { + if !h.isLoggingEnabled() { + return nil + } // It's possible that Stop was called without calling Prestart on agent // restarts. Attempt to reattach to an existing logmon. diff --git a/client/allocrunner/taskrunner/logmon_hook_test.go b/client/allocrunner/taskrunner/logmon_hook_test.go index 4f4cdb505..bc28b190d 100644 --- a/client/allocrunner/taskrunner/logmon_hook_test.go +++ b/client/allocrunner/taskrunner/logmon_hook_test.go @@ -15,6 +15,7 @@ import ( "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/mock" pstructs "github.com/hashicorp/nomad/plugins/shared/structs" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" "golang.org/x/exp/maps" ) @@ -69,7 +70,7 @@ func TestTaskRunner_LogmonHook_StartStop(t *testing.T) { dir := t.TempDir() - hookConf := newLogMonHookConfig(task.Name, dir) + hookConf := newLogMonHookConfig(task.Name, task.LogConfig, dir) runner := &TaskRunner{logmonHookConfig: hookConf} hook := newLogMonHook(runner, testlog.HCLogger(t)) @@ -103,3 +104,52 @@ func TestTaskRunner_LogmonHook_StartStop(t *testing.T) { } require.NoError(t, hook.Stop(context.Background(), &stopReq, nil)) } + +// TestTaskRunner_LogmonHook_Disabled asserts that no logmon running or expected +// by any of the lifecycle hooks. +func TestTaskRunner_LogmonHook_Disabled(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + task.LogConfig.Enabled = false + + dir := t.TempDir() + + hookConf := newLogMonHookConfig(task.Name, task.LogConfig, dir) + runner := &TaskRunner{logmonHookConfig: hookConf} + hook := newLogMonHook(runner, testlog.HCLogger(t)) + + req := interfaces.TaskPrestartRequest{Task: task} + resp := interfaces.TaskPrestartResponse{} + + // First prestart should not set reattach key and never be Done. + must.NoError(t, hook.Prestart(context.Background(), &req, &resp)) + t.Cleanup(func() { hook.Stop(context.Background(), nil, nil) }) + + must.False(t, resp.Done) + hookData, ok := resp.State[logmonReattachKey] + must.False(t, ok) + must.Eq(t, "", hookData) + + // Running prestart again should still be a noop + req.PreviousState = map[string]string{} + must.NoError(t, hook.Prestart(context.Background(), &req, &resp)) + + must.False(t, resp.Done) + hookData, ok = resp.State[logmonReattachKey] + must.False(t, ok) + must.Eq(t, "", hookData) + + // PreviousState should always be initialized by the caller, but just + // belt-and-suspenders for this test to ensure we can't panic on this code + // path + req.PreviousState = nil + must.NoError(t, hook.Prestart(context.Background(), &req, &resp)) + + // Running stop should not error even with no running logmon + stopReq := interfaces.TaskStopRequest{ + ExistingState: maps.Clone(resp.State), + } + must.NoError(t, hook.Stop(context.Background(), &stopReq, nil)) +} diff --git a/client/allocrunner/taskrunner/logmon_hook_unix_test.go b/client/allocrunner/taskrunner/logmon_hook_unix_test.go index a828036de..478a7bf7c 100644 --- a/client/allocrunner/taskrunner/logmon_hook_unix_test.go +++ b/client/allocrunner/taskrunner/logmon_hook_unix_test.go @@ -35,7 +35,7 @@ func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) { dir := t.TempDir() - hookConf := newLogMonHookConfig(task.Name, dir) + hookConf := newLogMonHookConfig(task.Name, task.LogConfig, dir) runner := &TaskRunner{logmonHookConfig: hookConf} hook := newLogMonHook(runner, testlog.HCLogger(t)) @@ -100,7 +100,7 @@ func TestTaskRunner_LogmonHook_ShutdownMidStart(t *testing.T) { dir := t.TempDir() - hookConf := newLogMonHookConfig(task.Name, dir) + hookConf := newLogMonHookConfig(task.Name, task.LogConfig, dir) runner := &TaskRunner{logmonHookConfig: hookConf} hook := newLogMonHook(runner, testlog.HCLogger(t)) diff --git a/client/allocrunner/taskrunner/task_runner_hooks.go b/client/allocrunner/taskrunner/task_runner_hooks.go index 1a3d844ac..698915878 100644 --- a/client/allocrunner/taskrunner/task_runner_hooks.go +++ b/client/allocrunner/taskrunner/task_runner_hooks.go @@ -53,7 +53,7 @@ func (tr *TaskRunner) initHooks() { hookLogger := tr.logger.Named("task_hook") task := tr.Task() - tr.logmonHookConfig = newLogMonHookConfig(task.Name, tr.taskDir.LogDir) + tr.logmonHookConfig = newLogMonHookConfig(task.Name, task.LogConfig, tr.taskDir.LogDir) // Add the hook resources tr.hookResources = &hookResources{} diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 098f3c761..5b53673f3 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1245,6 +1245,7 @@ func ApiTaskToStructsTask(job *structs.Job, group *structs.TaskGroup, structsTask.LogConfig = &structs.LogConfig{ MaxFiles: *apiTask.LogConfig.MaxFiles, MaxFileSizeMB: *apiTask.LogConfig.MaxFileSizeMB, + Enabled: *apiTask.LogConfig.Enabled, } if len(apiTask.Artifacts) > 0 { @@ -1809,6 +1810,7 @@ func apiLogConfigToStructs(in *api.LogConfig) *structs.LogConfig { return nil } return &structs.LogConfig{ + Enabled: *in.Enabled, MaxFiles: dereferenceInt(in.MaxFiles), MaxFileSizeMB: dereferenceInt(in.MaxFileSizeMB), } diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go index 6109af2ba..633f68f93 100644 --- a/command/agent/job_endpoint_test.go +++ b/command/agent/job_endpoint_test.go @@ -2767,6 +2767,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { KillTimeout: pointer.Of(10 * time.Second), KillSignal: "SIGQUIT", LogConfig: &api.LogConfig{ + Enabled: pointer.Of(true), MaxFiles: pointer.Of(10), MaxFileSizeMB: pointer.Of(100), }, @@ -3184,6 +3185,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { KillTimeout: 10 * time.Second, KillSignal: "SIGQUIT", LogConfig: &structs.LogConfig{ + Enabled: true, MaxFiles: 10, MaxFileSizeMB: 100, }, @@ -3340,6 +3342,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { KillTimeout: pointer.Of(10 * time.Second), KillSignal: "SIGQUIT", LogConfig: &api.LogConfig{ + Enabled: pointer.Of(true), MaxFiles: pointer.Of(10), MaxFileSizeMB: pointer.Of(100), }, @@ -3465,6 +3468,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { KillTimeout: 10 * time.Second, KillSignal: "SIGQUIT", LogConfig: &structs.LogConfig{ + Enabled: true, MaxFiles: 10, MaxFileSizeMB: 100, }, @@ -3637,9 +3641,11 @@ func TestConversion_apiLogConfigToStructs(t *testing.T) { ci.Parallel(t) require.Nil(t, apiLogConfigToStructs(nil)) require.Equal(t, &structs.LogConfig{ + Enabled: true, MaxFiles: 2, MaxFileSizeMB: 8, }, apiLogConfigToStructs(&api.LogConfig{ + Enabled: pointer.Of(true), MaxFiles: pointer.Of(2), MaxFileSizeMB: pointer.Of(8), })) @@ -3737,6 +3743,7 @@ func TestConversion_apiConnectSidecarTaskToStructs(t *testing.T) { Meta: meta, KillTimeout: &timeout, LogConfig: &structs.LogConfig{ + Enabled: true, MaxFiles: 2, MaxFileSizeMB: 8, }, @@ -3755,6 +3762,7 @@ func TestConversion_apiConnectSidecarTaskToStructs(t *testing.T) { Meta: meta, KillTimeout: &timeout, LogConfig: &api.LogConfig{ + Enabled: pointer.Of(true), MaxFiles: pointer.Of(2), MaxFileSizeMB: pointer.Of(8), }, diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go index 7bee45562..d0db2840f 100644 --- a/drivers/docker/driver.go +++ b/drivers/docker/driver.go @@ -253,7 +253,7 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { net: handleState.DriverNetwork, } - if !d.config.DisableLogCollection { + if loggingIsEnabled(d.config, handle.Config) { h.dlogger, h.dloggerPluginClient, err = d.reattachToDockerLogger(handleState.ReattachConfig) if err != nil { d.logger.Warn("failed to reattach to docker logger process", "error", err) @@ -284,6 +284,16 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { return nil } +func loggingIsEnabled(driverCfg *DriverConfig, taskCfg *drivers.TaskConfig) bool { + if driverCfg.DisableLogCollection { + return false + } + if taskCfg.StderrPath == os.DevNull && taskCfg.StdoutPath == os.DevNull { + return false + } + return true +} + func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) { if _, ok := d.tasks.Get(cfg.ID); ok { return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID) @@ -399,7 +409,7 @@ CREATE: } } - collectingLogs := !d.config.DisableLogCollection + collectingLogs := loggingIsEnabled(d.config, cfg) var dlogger docklog.DockerLogger var pluginClient *plugin.Client diff --git a/drivers/docker/driver_test.go b/drivers/docker/driver_test.go index 674f6e8ae..e0a4a623b 100644 --- a/drivers/docker/driver_test.go +++ b/drivers/docker/driver_test.go @@ -133,7 +133,7 @@ func dockerTask(t *testing.T) (*drivers.TaskConfig, *TaskConfig, []int) { func dockerSetup(t *testing.T, task *drivers.TaskConfig, driverCfg map[string]interface{}) (*docker.Client, *dtestutil.DriverHarness, *taskHandle, func()) { client := newTestDockerClient(t) driver := dockerDriverHarness(t, driverCfg) - cleanup := driver.MkAllocDir(task, true) + cleanup := driver.MkAllocDir(task, loggingIsEnabled(&DriverConfig{}, task)) copyImage(t, task.TaskDir(), "busybox.tar") _, _, err := driver.StartTask(task) @@ -841,6 +841,37 @@ func TestDockerDriver_LoggingConfiguration(t *testing.T) { require.Equal(t, loggerConfig, container.HostConfig.LogConfig.Config) } +// TestDockerDriver_LogCollectionDisabled ensures that logmon isn't configured +// when log collection is disable, but out-of-band Docker log shipping still +// works as expected +func TestDockerDriver_LogCollectionDisabled(t *testing.T) { + ci.Parallel(t) + testutil.DockerCompatible(t) + + task, cfg, _ := dockerTask(t) + task.StdoutPath = os.DevNull + task.StderrPath = os.DevNull + + must.NoError(t, task.EncodeConcreteDriverConfig(cfg)) + + dockerClientConfig := make(map[string]interface{}) + loggerConfig := map[string]string{"gelf-address": "udp://1.2.3.4:12201", "tag": "gelf"} + + dockerClientConfig["logging"] = LoggingConfig{ + Type: "gelf", + Config: loggerConfig, + } + client, d, handle, cleanup := dockerSetup(t, task, dockerClientConfig) + t.Cleanup(cleanup) + must.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) + container, err := client.InspectContainer(handle.containerID) + must.NoError(t, err) + must.Nil(t, handle.dlogger) + + must.Eq(t, "gelf", container.HostConfig.LogConfig.Type) + must.Eq(t, loggerConfig, container.HostConfig.LogConfig.Config) +} + func TestDockerDriver_HealthchecksDisable(t *testing.T) { ci.Parallel(t) testutil.DockerCompatible(t) diff --git a/jobspec/parse_task.go b/jobspec/parse_task.go index 06d2328a7..f81e78e29 100644 --- a/jobspec/parse_task.go +++ b/jobspec/parse_task.go @@ -262,6 +262,7 @@ func parseTask(item *ast.ObjectItem, keys []string) (*api.Task, error) { valid := []string{ "max_files", "max_file_size", + "enabled", } if err := checkHCLKeys(logsBlock.Val, valid); err != nil { return nil, multierror.Prefix(err, "logs ->") diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index aa801cf05..d9e943d4e 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -350,6 +350,7 @@ func TestParse(t *testing.T) { LogConfig: &api.LogConfig{ MaxFiles: intToPtr(14), MaxFileSizeMB: intToPtr(101), + Enabled: boolToPtr(true), }, Artifacts: []*api.TaskArtifact{ { diff --git a/jobspec/test-fixtures/basic.hcl b/jobspec/test-fixtures/basic.hcl index 238b185c4..3c12ea8f6 100644 --- a/jobspec/test-fixtures/basic.hcl +++ b/jobspec/test-fixtures/basic.hcl @@ -194,6 +194,7 @@ job "binstore-storagelocker" { } logs { + enabled = true max_files = 14 max_file_size = 101 } diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go index 2b0d67bf3..7b199471f 100644 --- a/nomad/structs/diff_test.go +++ b/nomad/structs/diff_test.go @@ -4420,6 +4420,7 @@ func TestTaskDiff(t *testing.T) { LogConfig: &LogConfig{ MaxFiles: 1, MaxFileSizeMB: 10, + Enabled: false, }, }, Expected: &TaskDiff{ @@ -4429,6 +4430,12 @@ func TestTaskDiff(t *testing.T) { Type: DiffTypeAdded, Name: "LogConfig", Fields: []*FieldDiff{ + { + Type: DiffTypeAdded, + Name: "Enabled", + Old: "", + New: "false", + }, { Type: DiffTypeAdded, Name: "MaxFileSizeMB", @@ -4452,6 +4459,7 @@ func TestTaskDiff(t *testing.T) { LogConfig: &LogConfig{ MaxFiles: 1, MaxFileSizeMB: 10, + Enabled: false, }, }, New: &Task{}, @@ -4462,6 +4470,12 @@ func TestTaskDiff(t *testing.T) { Type: DiffTypeDeleted, Name: "LogConfig", Fields: []*FieldDiff{ + { + Type: DiffTypeDeleted, + Name: "Enabled", + Old: "false", + New: "", + }, { Type: DiffTypeDeleted, Name: "MaxFileSizeMB", @@ -4485,12 +4499,14 @@ func TestTaskDiff(t *testing.T) { LogConfig: &LogConfig{ MaxFiles: 1, MaxFileSizeMB: 10, + Enabled: false, }, }, New: &Task{ LogConfig: &LogConfig{ MaxFiles: 2, MaxFileSizeMB: 20, + Enabled: true, }, }, Expected: &TaskDiff{ @@ -4500,6 +4516,12 @@ func TestTaskDiff(t *testing.T) { Type: DiffTypeEdited, Name: "LogConfig", Fields: []*FieldDiff{ + { + Type: DiffTypeEdited, + Name: "Enabled", + Old: "false", + New: "true", + }, { Type: DiffTypeEdited, Name: "MaxFileSizeMB", @@ -4524,12 +4546,14 @@ func TestTaskDiff(t *testing.T) { LogConfig: &LogConfig{ MaxFiles: 1, MaxFileSizeMB: 10, + Enabled: false, }, }, New: &Task{ LogConfig: &LogConfig{ MaxFiles: 1, MaxFileSizeMB: 20, + Enabled: true, }, }, Expected: &TaskDiff{ @@ -4539,6 +4563,12 @@ func TestTaskDiff(t *testing.T) { Type: DiffTypeEdited, Name: "LogConfig", Fields: []*FieldDiff{ + { + Type: DiffTypeEdited, + Name: "Enabled", + Old: "false", + New: "true", + }, { Type: DiffTypeEdited, Name: "MaxFileSizeMB", diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index c19b00f9d..ee4a30345 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -7225,6 +7225,7 @@ const ( type LogConfig struct { MaxFiles int MaxFileSizeMB int + Enabled bool } func (l *LogConfig) Equal(o *LogConfig) bool { @@ -7240,6 +7241,10 @@ func (l *LogConfig) Equal(o *LogConfig) bool { return false } + if l.Enabled != o.Enabled { + return false + } + return true } @@ -7250,6 +7255,7 @@ func (l *LogConfig) Copy() *LogConfig { return &LogConfig{ MaxFiles: l.MaxFiles, MaxFileSizeMB: l.MaxFileSizeMB, + Enabled: l.Enabled, } } @@ -7258,11 +7264,13 @@ func DefaultLogConfig() *LogConfig { return &LogConfig{ MaxFiles: 10, MaxFileSizeMB: 10, + Enabled: true, } } -// Validate returns an error if the log config specified are less than -// the minimum allowed. +// Validate returns an error if the log config specified are less than the +// minimum allowed. Note that because we have a non-zero default MaxFiles and +// MaxFileSizeMB, we can't validate that they're unset if Enabled=false func (l *LogConfig) Validate() error { var mErr multierror.Error if l.MaxFiles < 1 { diff --git a/scheduler/annotate.go b/scheduler/annotate.go index f6bea34eb..2e3cfb77d 100644 --- a/scheduler/annotate.go +++ b/scheduler/annotate.go @@ -188,7 +188,17 @@ FieldsLoop: ObjectsLoop: for _, oDiff := range diff.Objects { switch oDiff.Name { - case "LogConfig", "Service", "Constraint": + case "Service", "Constraint": + continue + case "LogConfig": + for _, fDiff := range oDiff.Fields { + switch fDiff.Name { + // force a destructive update if logger was enabled or disabled + case "Enabled": + destructive = true + break ObjectsLoop + } + } continue default: destructive = true diff --git a/scheduler/annotate_test.go b/scheduler/annotate_test.go index dd680f7a4..9c65e0232 100644 --- a/scheduler/annotate_test.go +++ b/scheduler/annotate_test.go @@ -336,6 +336,27 @@ func TestAnnotateTask(t *testing.T) { Parent: &structs.TaskGroupDiff{Type: structs.DiffTypeEdited}, Desired: AnnotationForcesInplaceUpdate, }, + { + Diff: &structs.TaskDiff{ + Type: structs.DiffTypeEdited, + Objects: []*structs.ObjectDiff{ + { + Type: structs.DiffTypeAdded, + Name: "LogConfig", + Fields: []*structs.FieldDiff{ + { + Type: structs.DiffTypeAdded, + Name: "Enabled", + Old: "true", + New: "false", + }, + }, + }, + }, + }, + Parent: &structs.TaskGroupDiff{Type: structs.DiffTypeEdited}, + Desired: AnnotationForcesDestructiveUpdate, + }, { Diff: &structs.TaskDiff{ Type: structs.DiffTypeEdited, diff --git a/scheduler/util.go b/scheduler/util.go index 2fc32aa84..ec7c90a77 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -304,6 +304,13 @@ func tasksUpdated(jobA, jobB *structs.Job, taskGroup string) comparison { if !at.Identity.Equal(bt.Identity) { return difference("task identity", at.Identity, bt.Identity) } + + // Most LogConfig updates are in-place but if we change Enabled we need + // to recreate the task to stop/start log collection and change the + // stdout/stderr of the task + if at.LogConfig.Enabled != bt.LogConfig.Enabled { + return difference("task log enabled", at.LogConfig.Enabled, bt.LogConfig.Enabled) + } } // none of the fields that trigger a destructive update were modified, diff --git a/website/content/api-docs/client.mdx b/website/content/api-docs/client.mdx index 3f5c53222..269c59a4e 100644 --- a/website/content/api-docs/client.mdx +++ b/website/content/api-docs/client.mdx @@ -603,7 +603,8 @@ fields: ## Stream Logs -This endpoint streams a task's stderr/stdout logs. +This endpoint streams a task's stderr/stdout logs. Note that if logging is set +to [enabled=false][] for the task, this endpoint will return a 404 error. | Method | Path | Produces | | ------ | ------------------------------ | ------------ | @@ -832,3 +833,4 @@ $ nomad operator api /v1/client/gc ``` [api-node-read]: /nomad/api-docs/nodes +[enabled=false]: /nomad/docs/job-specification/logs#enabled diff --git a/website/content/api-docs/jobs.mdx b/website/content/api-docs/jobs.mdx index cccb80ab9..c01ad9c66 100644 --- a/website/content/api-docs/jobs.mdx +++ b/website/content/api-docs/jobs.mdx @@ -491,6 +491,7 @@ $ curl \ }, "KillTimeout": 5000000000, "LogConfig": { + "Enabled": true, "MaxFiles": 10, "MaxFileSizeMB": 10 }, @@ -762,6 +763,7 @@ $ curl \ "Leader": false, "Lifecycle": null, "LogConfig": { + "Enabled": true, "MaxFileSizeMB": 10, "MaxFiles": 10 }, @@ -979,6 +981,7 @@ $ curl \ "Leader": false, "Lifecycle": null, "LogConfig": { + "Enabled": true, "MaxFileSizeMB": 10, "MaxFiles": 10 }, @@ -1145,6 +1148,7 @@ $ curl \ "Leader": false, "Lifecycle": null, "LogConfig": { + "Enabled": true, "MaxFileSizeMB": 10, "MaxFiles": 10 }, diff --git a/website/content/api-docs/json-jobs.mdx b/website/content/api-docs/json-jobs.mdx index 36687139f..06a7efb3d 100644 --- a/website/content/api-docs/json-jobs.mdx +++ b/website/content/api-docs/json-jobs.mdx @@ -927,6 +927,8 @@ The `Affinity` object supports the following keys: The `LogConfig` object configures the log rotation policy for a task's `stdout` and `stderr`. The `LogConfig` object supports the following attributes: +- `Enabled` - Enables log collection. + - `MaxFiles` - The maximum number of rotated files Nomad will retain for `stdout` and `stderr`, each tracked individually. @@ -940,6 +942,7 @@ a validation error when a job is submitted. ```json { "LogConfig": { + "Enabled": true, "MaxFiles": 3, "MaxFileSizeMB": 10 } diff --git a/website/content/docs/job-specification/logs.mdx b/website/content/docs/job-specification/logs.mdx index f4424c723..6ab62107d 100644 --- a/website/content/docs/job-specification/logs.mdx +++ b/website/content/docs/job-specification/logs.mdx @@ -3,7 +3,7 @@ layout: docs page_title: logs Block - Job Specification description: |- The "logs" block configures the log rotation policy for a task's stdout and - stderr. Logging is enabled by default with sane defaults. The "logs" block + stderr. Logging is enabled by default with reasonable defaults. The "logs" block allows for finer-grained control over how Nomad handles log files. --- @@ -12,10 +12,9 @@ description: |- The `logs` block configures the log rotation policy for a task's `stdout` and -`stderr`. Logging is enabled by default with sane defaults (provided in the -parameters section below), and there is currently no way to disable logging for -tasks. The `logs` block allows for finer-grained control over how Nomad handles -log files. +`stderr`. Logging is enabled by default with reasonable defaults (provided in +the parameters section below). The `logs` block allows for finer-grained control +over how Nomad handles log files. Nomad's log rotation works by writing stdout/stderr output from tasks to a file inside the `alloc/logs/` directory with the following format: @@ -32,6 +31,7 @@ job "docs" { logs { max_files = 10 max_file_size = 10 + enabled = true } } } @@ -52,6 +52,17 @@ please see the [`nomad alloc logs`][logs-command] command. the total amount of disk space needed to retain the rotated set of files, Nomad will return a validation error when a job is submitted. +- `enabled` `(bool: true)` - Specifies that log collection should be enabled for + this task. If set to `false`, the task driver will attach stdout/stderr of the + task to `/dev/null` (or `NUL` on Windows). You should only disable log + collection if your application has some other way of emitting logs, such as + writing to a remote syslog server. Note that the `nomad alloc logs` command + and related APIs will return errors (404 "not found") if logging is disabled. + + Some task drivers such as `docker` support a [`disable_log_collection`][] + option. If the task driver's `disable_log_collection` option is set to `true`, + it will override `enabled=true` in the task's `logs` block. + ## `logs` Examples The following examples only show the `logs` blocks. Remember that the @@ -60,7 +71,7 @@ The following examples only show the `logs` blocks. Remember that the ### Configure Defaults This example shows a default logging configuration. Yes, it is empty on purpose. -Nomad automatically enables logging with sane defaults as described in the +Nomad automatically enables logging with reasonable defaults as described in the parameters section above. ```hcl @@ -81,3 +92,4 @@ logs { ``` [logs-command]: /nomad/docs/commands/alloc/logs 'Nomad logs command' +[`disable_log_collection`]: /nomad/docs/drivers/docker#disable_log_collection