logs: fix missing allocation logs after update to Nomad 1.5.4 (#17087)

When the server restarts for the upgrade, it loads the `structs.Job` from the
Raft snapshot/logs. The jobspec has long since been parsed, so none of the
guards around the default value are in play. The empty field value for `Enabled`
is the zero value, which is false.

This doesn't impact any running allocation because we don't replace running
allocations when either the client or server restart. But as soon as any
allocation gets rescheduled (ex. you drain all your clients during upgrades),
it'll be using the `structs.Job` that the server has, which has `Enabled =
false`, and logs will not be collected.

This changeset fixes the bug by adding a new field `Disabled` which defaults to
false (so that the zero value works), and deprecates the old field.

Fixes #17076
This commit is contained in:
Tim Gross 2023-05-04 16:01:18 -04:00 committed by GitHub
parent b4c9f3bbc2
commit 17bd930ca9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 108 additions and 72 deletions

3
.changelog/17087.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
logging: Fixed a bug where alloc logs would not be collected after an upgrade to 1.5.4
```

View File

@ -639,16 +639,21 @@ func (g *TaskGroup) AddSpread(s *Spread) *TaskGroup {
// LogConfig provides configuration for log rotation
type LogConfig struct {
MaxFiles *int `mapstructure:"max_files" hcl:"max_files,optional"`
MaxFileSizeMB *int `mapstructure:"max_file_size" hcl:"max_file_size,optional"`
Enabled *bool `mapstructure:"enabled" hcl:"enabled,optional"`
MaxFiles *int `mapstructure:"max_files" hcl:"max_files,optional"`
MaxFileSizeMB *int `mapstructure:"max_file_size" hcl:"max_file_size,optional"`
// COMPAT(1.6.0): Enabled had to be swapped for Disabled to fix a backwards
// compatibility bug when restoring pre-1.5.4 jobs. Remove in 1.6.0
Enabled *bool `mapstructure:"enabled" hcl:"enabled,optional"`
Disabled *bool `mapstructure:"disabled" hcl:"disabled,optional"`
}
func DefaultLogConfig() *LogConfig {
return &LogConfig{
MaxFiles: pointerOf(10),
MaxFileSizeMB: pointerOf(10),
Enabled: pointerOf(true),
Disabled: pointerOf(false),
}
}
@ -659,8 +664,8 @@ func (l *LogConfig) Canonicalize() {
if l.MaxFileSizeMB == nil {
l.MaxFileSizeMB = pointerOf(10)
}
if l.Enabled == nil {
l.Enabled = pointerOf(true)
if l.Disabled == nil {
l.Disabled = pointerOf(false)
}
}

View File

@ -46,7 +46,7 @@ type logmonHook struct {
type logmonHookConfig struct {
logDir string
enabled bool
disabled bool
stdoutFifo string
stderrFifo string
}
@ -63,12 +63,12 @@ func newLogMonHook(tr *TaskRunner, logger hclog.Logger) *logmonHook {
func newLogMonHookConfig(taskName string, logCfg *structs.LogConfig, logDir string) *logmonHookConfig {
cfg := &logmonHookConfig{
logDir: logDir,
enabled: logCfg.Enabled,
logDir: logDir,
disabled: logCfg.Disabled,
}
// If logging is disabled configure task's stdout/err to point to devnull
if !logCfg.Enabled {
if logCfg.Disabled {
cfg.stdoutFifo = os.DevNull
cfg.stderrFifo = os.DevNull
return cfg
@ -116,7 +116,7 @@ func reattachConfigFromHookData(data map[string]string) (*plugin.ReattachConfig,
func (h *logmonHook) Prestart(ctx context.Context,
req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
if !h.isLoggingEnabled() {
if h.isLoggingDisabled() {
return nil
}
@ -151,10 +151,10 @@ func (h *logmonHook) Prestart(ctx context.Context,
}
}
func (h *logmonHook) isLoggingEnabled() bool {
if !h.config.enabled {
func (h *logmonHook) isLoggingDisabled() bool {
if h.config.disabled {
h.logger.Debug("log collection is disabled by task")
return false
return true
}
// internal plugins have access to a capability to disable logging and
@ -162,16 +162,16 @@ func (h *logmonHook) isLoggingEnabled() bool {
// currently only the docker driver exposes this to users.
ic, ok := h.runner.driver.(drivers.InternalCapabilitiesDriver)
if !ok {
return true
return false
}
caps := ic.InternalCapabilities()
if caps.DisableLogCollection {
h.logger.Debug("log collection is disabled by driver")
return false
return true
}
return true
return false
}
func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPrestartRequest) error {
@ -219,7 +219,7 @@ func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPr
}
func (h *logmonHook) Stop(_ context.Context, req *interfaces.TaskStopRequest, _ *interfaces.TaskStopResponse) error {
if !h.isLoggingEnabled() {
if h.isLoggingDisabled() {
return nil
}

View File

@ -112,7 +112,7 @@ func TestTaskRunner_LogmonHook_Disabled(t *testing.T) {
alloc := mock.BatchAlloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
task.LogConfig.Enabled = false
task.LogConfig.Disabled = true
dir := t.TempDir()

View File

@ -1242,11 +1242,7 @@ func ApiTaskToStructsTask(job *structs.Job, group *structs.TaskGroup,
structsTask.Resources = ApiResourcesToStructs(apiTask.Resources)
structsTask.LogConfig = &structs.LogConfig{
MaxFiles: *apiTask.LogConfig.MaxFiles,
MaxFileSizeMB: *apiTask.LogConfig.MaxFileSizeMB,
Enabled: *apiTask.LogConfig.Enabled,
}
structsTask.LogConfig = apiLogConfigToStructs(apiTask.LogConfig)
if len(apiTask.Artifacts) > 0 {
structsTask.Artifacts = []*structs.TaskArtifact{}
@ -1809,13 +1805,21 @@ func apiLogConfigToStructs(in *api.LogConfig) *structs.LogConfig {
if in == nil {
return nil
}
return &structs.LogConfig{
Enabled: *in.Enabled,
Disabled: dereferenceBool(in.Disabled),
MaxFiles: dereferenceInt(in.MaxFiles),
MaxFileSizeMB: dereferenceInt(in.MaxFileSizeMB),
}
}
func dereferenceBool(in *bool) bool {
if in == nil {
return false
}
return *in
}
func dereferenceInt(in *int) int {
if in == nil {
return 0

View File

@ -2767,7 +2767,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
KillTimeout: pointer.Of(10 * time.Second),
KillSignal: "SIGQUIT",
LogConfig: &api.LogConfig{
Enabled: pointer.Of(true),
Disabled: pointer.Of(true),
MaxFiles: pointer.Of(10),
MaxFileSizeMB: pointer.Of(100),
},
@ -3185,7 +3185,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
KillTimeout: 10 * time.Second,
KillSignal: "SIGQUIT",
LogConfig: &structs.LogConfig{
Enabled: true,
Disabled: true,
MaxFiles: 10,
MaxFileSizeMB: 100,
},
@ -3342,7 +3342,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
KillTimeout: pointer.Of(10 * time.Second),
KillSignal: "SIGQUIT",
LogConfig: &api.LogConfig{
Enabled: pointer.Of(true),
Disabled: pointer.Of(true),
MaxFiles: pointer.Of(10),
MaxFileSizeMB: pointer.Of(100),
},
@ -3468,7 +3468,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
KillTimeout: 10 * time.Second,
KillSignal: "SIGQUIT",
LogConfig: &structs.LogConfig{
Enabled: true,
Disabled: true,
MaxFiles: 10,
MaxFileSizeMB: 100,
},
@ -3639,16 +3639,39 @@ func TestConversion_dereferenceInt(t *testing.T) {
func TestConversion_apiLogConfigToStructs(t *testing.T) {
ci.Parallel(t)
require.Nil(t, apiLogConfigToStructs(nil))
require.Equal(t, &structs.LogConfig{
Enabled: true,
must.Nil(t, apiLogConfigToStructs(nil))
must.Eq(t, &structs.LogConfig{
Disabled: true,
MaxFiles: 2,
MaxFileSizeMB: 8,
}, apiLogConfigToStructs(&api.LogConfig{
Enabled: pointer.Of(true),
Disabled: pointer.Of(true),
MaxFiles: pointer.Of(2),
MaxFileSizeMB: pointer.Of(8),
}))
// COMPAT(1.6.0): verify backwards compatibility fixes
// Note: we're intentionally ignoring the Enabled: false case
must.Eq(t, &structs.LogConfig{Disabled: false},
apiLogConfigToStructs(&api.LogConfig{
Enabled: pointer.Of(false),
}))
must.Eq(t, &structs.LogConfig{Disabled: false},
apiLogConfigToStructs(&api.LogConfig{
Enabled: pointer.Of(true),
}))
must.Eq(t, &structs.LogConfig{Disabled: false},
apiLogConfigToStructs(&api.LogConfig{}))
must.Eq(t, &structs.LogConfig{Disabled: false},
apiLogConfigToStructs(&api.LogConfig{
Disabled: pointer.Of(false),
}))
must.Eq(t, &structs.LogConfig{Disabled: false},
apiLogConfigToStructs(&api.LogConfig{
Enabled: pointer.Of(false),
Disabled: pointer.Of(false),
}))
}
func TestConversion_apiResourcesToStructs(t *testing.T) {
@ -3743,7 +3766,7 @@ func TestConversion_apiConnectSidecarTaskToStructs(t *testing.T) {
Meta: meta,
KillTimeout: &timeout,
LogConfig: &structs.LogConfig{
Enabled: true,
Disabled: true,
MaxFiles: 2,
MaxFileSizeMB: 8,
},
@ -3762,7 +3785,7 @@ func TestConversion_apiConnectSidecarTaskToStructs(t *testing.T) {
Meta: meta,
KillTimeout: &timeout,
LogConfig: &api.LogConfig{
Enabled: pointer.Of(true),
Disabled: pointer.Of(true),
MaxFiles: pointer.Of(2),
MaxFileSizeMB: pointer.Of(8),
},

View File

@ -262,7 +262,8 @@ func parseTask(item *ast.ObjectItem, keys []string) (*api.Task, error) {
valid := []string{
"max_files",
"max_file_size",
"enabled",
"enabled", // COMPAT(1.6.0): remove in favor of disabled
"disabled",
}
if err := checkHCLKeys(logsBlock.Val, valid); err != nil {
return nil, multierror.Prefix(err, "logs ->")

View File

@ -350,7 +350,7 @@ func TestParse(t *testing.T) {
LogConfig: &api.LogConfig{
MaxFiles: intToPtr(14),
MaxFileSizeMB: intToPtr(101),
Enabled: boolToPtr(true),
Disabled: boolToPtr(false),
},
Artifacts: []*api.TaskArtifact{
{

View File

@ -194,7 +194,7 @@ job "binstore-storagelocker" {
}
logs {
enabled = true
disabled = false
max_files = 14
max_file_size = 101
}

View File

@ -4420,7 +4420,7 @@ func TestTaskDiff(t *testing.T) {
LogConfig: &LogConfig{
MaxFiles: 1,
MaxFileSizeMB: 10,
Enabled: false,
Disabled: true,
},
},
Expected: &TaskDiff{
@ -4432,9 +4432,9 @@ func TestTaskDiff(t *testing.T) {
Fields: []*FieldDiff{
{
Type: DiffTypeAdded,
Name: "Enabled",
Name: "Disabled",
Old: "",
New: "false",
New: "true",
},
{
Type: DiffTypeAdded,
@ -4459,7 +4459,7 @@ func TestTaskDiff(t *testing.T) {
LogConfig: &LogConfig{
MaxFiles: 1,
MaxFileSizeMB: 10,
Enabled: false,
Disabled: true,
},
},
New: &Task{},
@ -4472,8 +4472,8 @@ func TestTaskDiff(t *testing.T) {
Fields: []*FieldDiff{
{
Type: DiffTypeDeleted,
Name: "Enabled",
Old: "false",
Name: "Disabled",
Old: "true",
New: "",
},
{
@ -4499,14 +4499,14 @@ func TestTaskDiff(t *testing.T) {
LogConfig: &LogConfig{
MaxFiles: 1,
MaxFileSizeMB: 10,
Enabled: false,
Disabled: false,
},
},
New: &Task{
LogConfig: &LogConfig{
MaxFiles: 2,
MaxFileSizeMB: 20,
Enabled: true,
Disabled: true,
},
},
Expected: &TaskDiff{
@ -4518,7 +4518,7 @@ func TestTaskDiff(t *testing.T) {
Fields: []*FieldDiff{
{
Type: DiffTypeEdited,
Name: "Enabled",
Name: "Disabled",
Old: "false",
New: "true",
},
@ -4546,14 +4546,14 @@ func TestTaskDiff(t *testing.T) {
LogConfig: &LogConfig{
MaxFiles: 1,
MaxFileSizeMB: 10,
Enabled: false,
Disabled: false,
},
},
New: &Task{
LogConfig: &LogConfig{
MaxFiles: 1,
MaxFileSizeMB: 20,
Enabled: true,
Disabled: true,
},
},
Expected: &TaskDiff{
@ -4565,7 +4565,7 @@ func TestTaskDiff(t *testing.T) {
Fields: []*FieldDiff{
{
Type: DiffTypeEdited,
Name: "Enabled",
Name: "Disabled",
Old: "false",
New: "true",
},

View File

@ -7225,7 +7225,7 @@ const (
type LogConfig struct {
MaxFiles int
MaxFileSizeMB int
Enabled bool
Disabled bool
}
func (l *LogConfig) Equal(o *LogConfig) bool {
@ -7241,7 +7241,7 @@ func (l *LogConfig) Equal(o *LogConfig) bool {
return false
}
if l.Enabled != o.Enabled {
if l.Disabled != o.Disabled {
return false
}
@ -7255,7 +7255,7 @@ func (l *LogConfig) Copy() *LogConfig {
return &LogConfig{
MaxFiles: l.MaxFiles,
MaxFileSizeMB: l.MaxFileSizeMB,
Enabled: l.Enabled,
Disabled: l.Disabled,
}
}
@ -7264,13 +7264,13 @@ func DefaultLogConfig() *LogConfig {
return &LogConfig{
MaxFiles: 10,
MaxFileSizeMB: 10,
Enabled: true,
Disabled: false,
}
}
// Validate returns an error if the log config specified are less than the
// minimum allowed. Note that because we have a non-zero default MaxFiles and
// MaxFileSizeMB, we can't validate that they're unset if Enabled=false
// MaxFileSizeMB, we can't validate that they're unset if Disabled=true
func (l *LogConfig) Validate() error {
var mErr multierror.Error
if l.MaxFiles < 1 {

View File

@ -194,7 +194,7 @@ FieldsLoop:
for _, fDiff := range oDiff.Fields {
switch fDiff.Name {
// force a destructive update if logger was enabled or disabled
case "Enabled":
case "Disabled":
destructive = true
break ObjectsLoop
}

View File

@ -346,7 +346,7 @@ func TestAnnotateTask(t *testing.T) {
Fields: []*structs.FieldDiff{
{
Type: structs.DiffTypeAdded,
Name: "Enabled",
Name: "Disabled",
Old: "true",
New: "false",
},

View File

@ -305,11 +305,11 @@ func tasksUpdated(jobA, jobB *structs.Job, taskGroup string) comparison {
return difference("task identity", at.Identity, bt.Identity)
}
// Most LogConfig updates are in-place but if we change Enabled we need
// Most LogConfig updates are in-place but if we change Disabled we need
// to recreate the task to stop/start log collection and change the
// stdout/stderr of the task
if at.LogConfig.Enabled != bt.LogConfig.Enabled {
return difference("task log enabled", at.LogConfig.Enabled, bt.LogConfig.Enabled)
if at.LogConfig.Disabled != bt.LogConfig.Disabled {
return difference("task log disabled", at.LogConfig.Disabled, bt.LogConfig.Disabled)
}
}

View File

@ -604,7 +604,7 @@ fields:
## Stream Logs
This endpoint streams a task's stderr/stdout logs. Note that if logging is set
to [enabled=false][] for the task, this endpoint will return a 404 error.
to [disabled=true][] for the task, this endpoint will return a 404 error.
| Method | Path | Produces |
| ------ | ------------------------------ | ------------ |
@ -833,4 +833,4 @@ $ nomad operator api /v1/client/gc
```
[api-node-read]: /nomad/api-docs/nodes
[enabled=false]: /nomad/docs/job-specification/logs#enabled
[disabled=true]: /nomad/docs/job-specification/logs#disabled

View File

@ -491,7 +491,7 @@ $ curl \
},
"KillTimeout": 5000000000,
"LogConfig": {
"Enabled": true,
"Disabled": false,
"MaxFiles": 10,
"MaxFileSizeMB": 10
},
@ -763,7 +763,7 @@ $ curl \
"Leader": false,
"Lifecycle": null,
"LogConfig": {
"Enabled": true,
"Disabled": false,
"MaxFileSizeMB": 10,
"MaxFiles": 10
},
@ -981,7 +981,7 @@ $ curl \
"Leader": false,
"Lifecycle": null,
"LogConfig": {
"Enabled": true,
"Disabled": false,
"MaxFileSizeMB": 10,
"MaxFiles": 10
},
@ -1148,7 +1148,7 @@ $ curl \
"Leader": false,
"Lifecycle": null,
"LogConfig": {
"Enabled": true,
"Disabled": false,
"MaxFileSizeMB": 10,
"MaxFiles": 10
},

View File

@ -927,7 +927,7 @@ The `Affinity` object supports the following keys:
The `LogConfig` object configures the log rotation policy for a task's `stdout` and
`stderr`. The `LogConfig` object supports the following attributes:
- `Enabled` - Enables log collection.
- `Disabled` - Disables log collection.
- `MaxFiles` - The maximum number of rotated files Nomad will retain for
`stdout` and `stderr`, each tracked individually.
@ -942,7 +942,7 @@ a validation error when a job is submitted.
```json
{
"LogConfig": {
"Enabled": true,
"Disabled": false,
"MaxFiles": 3,
"MaxFileSizeMB": 10
}

View File

@ -31,7 +31,7 @@ job "docs" {
logs {
max_files = 10
max_file_size = 10
enabled = true
disabled = false
}
}
}
@ -52,8 +52,8 @@ please see the [`nomad alloc logs`][logs-command] command.
the total amount of disk space needed to retain the rotated set of files,
Nomad will return a validation error when a job is submitted.
- `enabled` `(bool: true)` - Specifies that log collection should be enabled for
this task. If set to `false`, the task driver will attach stdout/stderr of the
- `disabled` `(bool: false)` - Specifies that log collection should be enabled for
this task. If set to `true`, the task driver will attach stdout/stderr of the
task to `/dev/null` (or `NUL` on Windows). You should only disable log
collection if your application has some other way of emitting logs, such as
writing to a remote syslog server. Note that the `nomad alloc logs` command
@ -61,7 +61,7 @@ please see the [`nomad alloc logs`][logs-command] command.
Some task drivers such as `docker` support a [`disable_log_collection`][]
option. If the task driver's `disable_log_collection` option is set to `true`,
it will override `enabled=true` in the task's `logs` block.
it will override `disabled=false` in the task's `logs` block.
## `logs` Examples