From b843b1a853256b8a9f87479b382d04e29aef1706 Mon Sep 17 00:00:00 2001 From: Abhishek Chanda Date: Mon, 21 Dec 2015 06:06:45 +0000 Subject: [PATCH 01/45] Support CPU and meory isolators for the rkt driver The rkt community added supprt for these isolators recently --- client/driver/rkt.go | 13 +++++++++++++ client/driver/rkt_test.go | 16 ++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/client/driver/rkt.go b/client/driver/rkt.go index 1d90ba71e..403cfb537 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -149,6 +149,19 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e cmd_args = append(cmd_args, fmt.Sprintf("--exec=%v", exec_cmd)) } + if task.Resources.MemoryMB == 0 { + return nil, fmt.Errorf("Memory limit cannot be zero") + } + if task.Resources.CPU == 0 { + return nil, fmt.Errorf("CPU limit cannot be zero") + } + + // Add memory isolator + cmd_args = append(cmd_args, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB) * 1024 * 1024)) + + // Add CPU isolator + cmd_args = append(cmd_args, fmt.Sprintf("--cpu=%vm", int64(task.Resources.CPU))) + // Add user passed arguments. if len(driverConfig.Args) != 0 { parsed := args.ParseAndReplace(driverConfig.Args, envVars.Map()) diff --git a/client/driver/rkt_test.go b/client/driver/rkt_test.go index 15db27b27..d63ca2d91 100644 --- a/client/driver/rkt_test.go +++ b/client/driver/rkt_test.go @@ -81,6 +81,10 @@ func TestRktDriver_Start(t *testing.T) { "image": "coreos.com/etcd:v2.0.4", "command": "/etcd", }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, } driverCtx := testDriverContext(task.Name) @@ -121,6 +125,10 @@ func TestRktDriver_Start_Wait(t *testing.T) { "command": "/etcd", "args": []string{"--version"}, }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, } driverCtx := testDriverContext(task.Name) @@ -162,6 +170,10 @@ func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) { "command": "/etcd", "args": []string{"--version"}, }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, } driverCtx := testDriverContext(task.Name) @@ -204,6 +216,10 @@ func TestRktDriver_Start_Wait_Logs(t *testing.T) { "command": "/etcd", "args": []string{"--version"}, }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, } driverCtx := testDriverContext(task.Name) From 3d2589e797dc0434a33e5263600c80b9ccde3921 Mon Sep 17 00:00:00 2001 From: Abhishek Chanda Date: Mon, 21 Dec 2015 06:09:11 +0000 Subject: [PATCH 02/45] Run gofmt --- client/driver/rkt.go | 12 ++++++------ client/driver/rkt_test.go | 24 ++++++++++++------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/client/driver/rkt.go b/client/driver/rkt.go index 403cfb537..b205a480f 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -150,14 +150,14 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e } if task.Resources.MemoryMB == 0 { - return nil, fmt.Errorf("Memory limit cannot be zero") - } - if task.Resources.CPU == 0 { - return nil, fmt.Errorf("CPU limit cannot be zero") - } + return nil, fmt.Errorf("Memory limit cannot be zero") + } + if task.Resources.CPU == 0 { + return nil, fmt.Errorf("CPU limit cannot be zero") + } // Add memory isolator - cmd_args = append(cmd_args, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB) * 1024 * 1024)) + cmd_args = append(cmd_args, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB)*1024*1024)) // Add CPU isolator cmd_args = append(cmd_args, fmt.Sprintf("--cpu=%vm", int64(task.Resources.CPU))) diff --git a/client/driver/rkt_test.go b/client/driver/rkt_test.go index d63ca2d91..8ee4425c2 100644 --- a/client/driver/rkt_test.go +++ b/client/driver/rkt_test.go @@ -125,10 +125,10 @@ func TestRktDriver_Start_Wait(t *testing.T) { "command": "/etcd", "args": []string{"--version"}, }, - Resources: &structs.Resources{ - MemoryMB: 256, - CPU: 512, - }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, } driverCtx := testDriverContext(task.Name) @@ -170,10 +170,10 @@ func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) { "command": "/etcd", "args": []string{"--version"}, }, - Resources: &structs.Resources{ - MemoryMB: 256, - CPU: 512, - }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, } driverCtx := testDriverContext(task.Name) @@ -216,10 +216,10 @@ func TestRktDriver_Start_Wait_Logs(t *testing.T) { "command": "/etcd", "args": []string{"--version"}, }, - Resources: &structs.Resources{ - MemoryMB: 256, - CPU: 512, - }, + Resources: &structs.Resources{ + MemoryMB: 256, + CPU: 512, + }, } driverCtx := testDriverContext(task.Name) From 1c4e3808689912689563f988eeb586c4cf93df7f Mon Sep 17 00:00:00 2001 From: Abhishek Chanda Date: Mon, 21 Dec 2015 10:10:37 +0000 Subject: [PATCH 03/45] Use camelCase for variable names --- client/driver/rkt.go | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/client/driver/rkt.go b/client/driver/rkt.go index b205a480f..c5e1f76da 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -109,21 +109,21 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e taskLocal := filepath.Join(taskDir, allocdir.TaskLocal) // Add the given trust prefix - trust_prefix, trust_cmd := task.Config["trust_prefix"] - if trust_cmd { + trustPrefix, trustCmd := task.Config["trust_prefix"] + if trustCmd { var outBuf, errBuf bytes.Buffer - cmd := exec.Command("rkt", "trust", fmt.Sprintf("--prefix=%s", trust_prefix)) + cmd := exec.Command("rkt", "trust", fmt.Sprintf("--prefix=%s", trustPrefix)) cmd.Stdout = &outBuf cmd.Stderr = &errBuf if err := cmd.Run(); err != nil { return nil, fmt.Errorf("Error running rkt trust: %s\n\nOutput: %s\n\nError: %s", err, outBuf.String(), errBuf.String()) } - d.logger.Printf("[DEBUG] driver.rkt: added trust prefix: %q", trust_prefix) + d.logger.Printf("[DEBUG] driver.rkt: added trust prefix: %q", trustPrefix) } // Build the command. - var cmd_args []string + var cmdArgs []string // Inject the environment variables. envVars := TaskEnvironmentVariables(ctx, task) @@ -133,20 +133,20 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e envVars.ClearAllocDir() for k, v := range envVars.Map() { - cmd_args = append(cmd_args, fmt.Sprintf("--set-env=%v=%v", k, v)) + cmdArgs = append(cmdArgs, fmt.Sprintf("--set-env=%v=%v", k, v)) } // Disble signature verification if the trust command was not run. - if !trust_cmd { - cmd_args = append(cmd_args, "--insecure-skip-verify") + if !trustCmd { + cmdArgs = append(cmdArgs, "--insecure-skip-verify") } // Append the run command. - cmd_args = append(cmd_args, "run", "--mds-register=false", img) + cmdArgs = append(cmdArgs, "run", "--mds-register=false", img) // Check if the user has overriden the exec command. - if exec_cmd, ok := task.Config["command"]; ok { - cmd_args = append(cmd_args, fmt.Sprintf("--exec=%v", exec_cmd)) + if execCmd, ok := task.Config["command"]; ok { + cmdArgs = append(cmdArgs, fmt.Sprintf("--exec=%v", execCmd)) } if task.Resources.MemoryMB == 0 { @@ -157,10 +157,10 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e } // Add memory isolator - cmd_args = append(cmd_args, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB)*1024*1024)) + cmdArgs = append(cmdArgs, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB)*1024*1024)) // Add CPU isolator - cmd_args = append(cmd_args, fmt.Sprintf("--cpu=%vm", int64(task.Resources.CPU))) + cmdArgs = append(cmdArgs, fmt.Sprintf("--cpu=%vm", int64(task.Resources.CPU))) // Add user passed arguments. if len(driverConfig.Args) != 0 { @@ -168,11 +168,11 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e // Need to start arguments with "--" if len(parsed) > 0 { - cmd_args = append(cmd_args, "--") + cmdArgs = append(cmdArgs, "--") } for _, arg := range parsed { - cmd_args = append(cmd_args, fmt.Sprintf("%v", arg)) + cmdArgs = append(cmdArgs, fmt.Sprintf("%v", arg)) } } @@ -190,7 +190,7 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e return nil, fmt.Errorf("Error opening file to redirect stderr: %v", err) } - cmd := exec.Command("rkt", cmd_args...) + cmd := exec.Command("rkt", cmdArgs...) cmd.Stdout = stdo cmd.Stderr = stde From 0918a382214618bd669712308f118d7d1d722afb Mon Sep 17 00:00:00 2001 From: Abhishek Chanda Date: Mon, 21 Dec 2015 17:48:21 +0000 Subject: [PATCH 04/45] Do not allow rkt version less than 0.14.0 --- client/driver/rkt.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/client/driver/rkt.go b/client/driver/rkt.go index c5e1f76da..871b20bda 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -14,6 +14,7 @@ import ( "syscall" "time" + "github.com/hashicorp/go-version" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/config" cstructs "github.com/hashicorp/nomad/client/driver/structs" @@ -85,6 +86,13 @@ func (d *RktDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, e node.Attributes["driver.rkt.version"] = rktMatches[1] node.Attributes["driver.rkt.appc.version"] = appcMatches[1] + minVersion, _ := version.NewVersion("0.14.0") + currentVersion, _ := version.NewVersion(node.Attributes["driver.rkt.version"]) + if currentVersion.LessThan(minVersion) { + // Do not allow rkt < 0.14.0 + d.logger.Printf("[WARN] driver.rkt: please upgrade rkt to a version >= %s", minVersion) + node.Attributes["driver.rkt"] = "0" + } return true, nil } From 74a52927894b1279df23adcc7314d4b6fb008c4e Mon Sep 17 00:00:00 2001 From: Abhishek Chanda Date: Tue, 22 Dec 2015 05:15:37 +0000 Subject: [PATCH 05/45] Move constants to the top --- client/driver/rkt.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/client/driver/rkt.go b/client/driver/rkt.go index 871b20bda..90e33cbc4 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -29,6 +29,13 @@ var ( reAppcVersion = regexp.MustCompile(`appc version (\d[.\d]+)`) ) +const ( + // rkt added support for CPU and memory isolators in 0.14.0. We cannot support + // an earlier version to maintain an uniform interface across all drivers + minRktVersion = "0.14.0" + conversionFactor = 1024 * 1024 +) + // RktDriver is a driver for running images via Rkt // We attempt to chose sane defaults for now, with more configuration available // planned in the future @@ -86,7 +93,7 @@ func (d *RktDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, e node.Attributes["driver.rkt.version"] = rktMatches[1] node.Attributes["driver.rkt.appc.version"] = appcMatches[1] - minVersion, _ := version.NewVersion("0.14.0") + minVersion, _ := version.NewVersion(minRktVersion) currentVersion, _ := version.NewVersion(node.Attributes["driver.rkt.version"]) if currentVersion.LessThan(minVersion) { // Do not allow rkt < 0.14.0 @@ -165,7 +172,7 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e } // Add memory isolator - cmdArgs = append(cmdArgs, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB)*1024*1024)) + cmdArgs = append(cmdArgs, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB)*conversionFactor)) // Add CPU isolator cmdArgs = append(cmdArgs, fmt.Sprintf("--cpu=%vm", int64(task.Resources.CPU))) From cd51ee6430bed243512ae58e644142460795d9be Mon Sep 17 00:00:00 2001 From: Abhishek Chanda Date: Mon, 21 Dec 2015 16:55:33 +0000 Subject: [PATCH 06/45] Handle non 200 codes while getting env metadata --- client/fingerprint/env_aws.go | 4 ++++ client/fingerprint/env_gce.go | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go index 1963f6c50..3a55aff50 100644 --- a/client/fingerprint/env_aws.go +++ b/client/fingerprint/env_aws.go @@ -116,6 +116,10 @@ func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) } for _, k := range keys { res, err := client.Get(metadataURL + k) + if res.StatusCode != http.StatusOK { + f.logger.Printf("[WARN]: fingerprint.env_aws: Could not read value for attribute %q", k) + continue + } if err != nil { // if it's a URL error, assume we're not in an AWS environment // TODO: better way to detect AWS? Check xen virtualization? diff --git a/client/fingerprint/env_gce.go b/client/fingerprint/env_gce.go index 357148c5f..0353dbdb1 100644 --- a/client/fingerprint/env_gce.go +++ b/client/fingerprint/env_gce.go @@ -94,7 +94,8 @@ func (f *EnvGCEFingerprint) Get(attribute string, recursive bool) (string, error } res, err := f.client.Do(req) - if err != nil { + if err != nil || res.StatusCode != http.StatusOK { + f.logger.Printf("[WARN]: fingerprint.env_gce: Could not read value for attribute %q", attribute) return "", err } From 29411d698ab7011a9baf359e99825a7b6de58879 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Dec 2015 10:11:22 -0800 Subject: [PATCH 07/45] Rkt comment and variable name update --- client/driver/rkt.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/client/driver/rkt.go b/client/driver/rkt.go index 90e33cbc4..5da864bfa 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -30,10 +30,13 @@ var ( ) const ( - // rkt added support for CPU and memory isolators in 0.14.0. We cannot support - // an earlier version to maintain an uniform interface across all drivers - minRktVersion = "0.14.0" - conversionFactor = 1024 * 1024 + // minRktVersion is the earliest supported version of rkt. rkt added support + // for CPU and memory isolators in 0.14.0. We cannot support an earlier + // version to maintain an uniform interface across all drivers + minRktVersion = "0.14.0" + + // bytesToMB is the conversion from bytes to megabytes. + bytesToMB = 1024 * 1024 ) // RktDriver is a driver for running images via Rkt @@ -172,7 +175,7 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e } // Add memory isolator - cmdArgs = append(cmdArgs, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB)*conversionFactor)) + cmdArgs = append(cmdArgs, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB)*bytesToMB)) // Add CPU isolator cmdArgs = append(cmdArgs, fmt.Sprintf("--cpu=%vm", int64(task.Resources.CPU))) From 305fe776027b1e0e752bc91c013ecc63d244a05c Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Dec 2015 10:20:07 -0800 Subject: [PATCH 08/45] Changelog and version --- CHANGELOG.md | 12 ++++++++++++ version.go | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5c689af3..990f48eb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +## 0.3.0 (UNRELEASED) + +IMPROVEMENTS: + * core: Improved restart policy with more user configuration [GH-594] + * core: Batch jobs are garbage collected from the Nomad Servers [GH-586] + * driver/rkt: Add support for CPU/Memory isolation [GH-610] + * cli: Output of agent-info is sorted [GH-617] + +BUG FIXES: + * cli: Handle parsing of un-named ports [GH-604] + * client: Handle non-200 codes when parsing AWS metadata [GH-614] + ## 0.2.3 (December 17, 2015) BUG FIXES: diff --git a/version.go b/version.go index f7908dc06..c2fba56b0 100644 --- a/version.go +++ b/version.go @@ -5,9 +5,9 @@ var GitCommit string var GitDescribe string // The main version number that is being run at the moment. -const Version = "0.2.3" +const Version = "0.3.0" // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release // such as "dev" (in development), "beta", "rc1", etc. -const VersionPrerelease = "" +const VersionPrerelease = "dev" From a347cda6e3c22bb56e53a1b5b338757380f408cb Mon Sep 17 00:00:00 2001 From: Abhishek Chanda Date: Tue, 22 Dec 2015 18:23:29 +0000 Subject: [PATCH 09/45] Consolidate if else conditions --- client/driver/rkt.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/client/driver/rkt.go b/client/driver/rkt.go index 5da864bfa..99084b5f8 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -126,6 +126,9 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e } taskLocal := filepath.Join(taskDir, allocdir.TaskLocal) + // Build the command. + var cmdArgs []string + // Add the given trust prefix trustPrefix, trustCmd := task.Config["trust_prefix"] if trustCmd { @@ -138,11 +141,11 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e err, outBuf.String(), errBuf.String()) } d.logger.Printf("[DEBUG] driver.rkt: added trust prefix: %q", trustPrefix) + } else { + // Disble signature verification if the trust command was not run. + cmdArgs = append(cmdArgs, "--insecure-skip-verify") } - // Build the command. - var cmdArgs []string - // Inject the environment variables. envVars := TaskEnvironmentVariables(ctx, task) @@ -154,11 +157,6 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e cmdArgs = append(cmdArgs, fmt.Sprintf("--set-env=%v=%v", k, v)) } - // Disble signature verification if the trust command was not run. - if !trustCmd { - cmdArgs = append(cmdArgs, "--insecure-skip-verify") - } - // Append the run command. cmdArgs = append(cmdArgs, "run", "--mds-register=false", img) From 97b0fb3c49dd8012e219057d49fc9a5a91ff1543 Mon Sep 17 00:00:00 2001 From: Abhishek Chanda Date: Wed, 23 Dec 2015 00:05:25 +0530 Subject: [PATCH 10/45] Update some rkt docs --- website/source/docs/drivers/rkt.html.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/website/source/docs/drivers/rkt.html.md b/website/source/docs/drivers/rkt.html.md index d37e16b5c..56d1069bc 100644 --- a/website/source/docs/drivers/rkt.html.md +++ b/website/source/docs/drivers/rkt.html.md @@ -49,9 +49,11 @@ The `rkt` driver will set the following client attributes: * `driver.rkt` - Set to `1` if rkt is found on the host node. Nomad determines this by executing `rkt version` on the host and parsing the output -* `driver.rkt.version` - Version of `rkt` eg: `0.8.1` +* `driver.rkt.version` - Version of `rkt` eg: `0.8.1`. Note that the minimum required +version is `0.14.0` * `driver.rkt.appc.version` - Version of `appc` that `rkt` is using eg: `0.8.1` ## Resource Isolation -This driver does not support any resource isolation as of now. +This driver supports CPU and memory isolation by delegating to `rkt`. Network isolation +is not supported as of now. From f3dbccdb488ae35578648d20f5bbd9d66d70a158 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Dec 2015 10:37:45 -0800 Subject: [PATCH 11/45] Update rkt docs --- website/source/docs/drivers/rkt.html.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/website/source/docs/drivers/rkt.html.md b/website/source/docs/drivers/rkt.html.md index 56d1069bc..5efab860b 100644 --- a/website/source/docs/drivers/rkt.html.md +++ b/website/source/docs/drivers/rkt.html.md @@ -11,10 +11,9 @@ description: |- Name: `rkt` The `rkt` driver provides an interface for using CoreOS rkt for running -application containers. Currently, the driver supports launching -containers but does not support resource isolation or dynamic ports. This can -lead to resource over commitment and port conflicts and as such, this driver is -being marked as experimental and should be used with care. +application containers. Currently, the driver supports launching containers but +does not support dynamic ports. This can lead to port conflicts and as such, +this driver is being marked as experimental and should be used with care. ## Task Configuration From 2d16cb86ce4a875dd851450f5df4c9b0be762b71 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Dec 2015 11:14:41 -0800 Subject: [PATCH 12/45] Enforce absolute paths for alloc/state/data directories --- command/agent/command.go | 17 +++++++++++++++++ website/source/docs/agent/config.html.md | 6 +++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index 6976b9d76..7c3d5ef34 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -193,6 +193,23 @@ func (c *Command) readConfig() *Config { return nil } + // Verify the paths are absolute. + dirs := map[string]string{ + "data-dir": config.DataDir, + "alloc-dir": config.Client.AllocDir, + "state-dir": config.Client.StateDir, + } + for k, dir := range dirs { + if dir == "" { + continue + } + + if !filepath.IsAbs(dir) { + c.Ui.Error(fmt.Sprintf("%s must be given as an absolute path: got %v", k, dir)) + return nil + } + } + // Ensure that we have the directories we neet to run. if config.Server.Enabled && config.DataDir == "" { c.Ui.Error("Must specify data directory") diff --git a/website/source/docs/agent/config.html.md b/website/source/docs/agent/config.html.md index c35ac0a63..386ba64ed 100644 --- a/website/source/docs/agent/config.html.md +++ b/website/source/docs/agent/config.html.md @@ -94,7 +94,7 @@ nodes, unless otherwise specified: directory by default to store temporary allocation data as well as cluster information. Server nodes use this directory to store cluster state, including the replicated log and snapshot data. This option is required to start the - Nomad agent. + Nomad agent and must be specified as an absolute path. * `log_level`: Controls the verbosity of logs the Nomad agent will output. Valid log levels include `WARN`, `INFO`, or `DEBUG` in increasing order of @@ -253,14 +253,14 @@ configured on server nodes. configuration options depend on this value. Defaults to `false`. * `state_dir`: This is the state dir used to store client state. By default, it lives inside of the [data_dir](#data_dir), in - the "client" sub-path. + the "client" sub-path. It must be specified as an absolute path. * `alloc_dir`: A directory used to store allocation data. Depending on the workload, the size of this directory can grow arbitrarily large as it is used to store downloaded artifacts for drivers (QEMU images, JAR files, etc.). It is therefore important to ensure this directory is placed some place on the filesystem with adequate storage capacity. By default, this directory lives under the [data_dir](#data_dir) at the - "alloc" sub-path. + "alloc" sub-path. It must be specified as an absolute path. * `servers`: An array of server addresses. This list is used to register the client with the server nodes and advertise the available resources so that the agent can receive work. From 6eab43fb076818334b857f9aef7a16b4a42918e6 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 22 Dec 2015 11:16:15 -0800 Subject: [PATCH 13/45] Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 990f48eb2..c62ba4fb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ IMPROVEMENTS: BUG FIXES: * cli: Handle parsing of un-named ports [GH-604] * client: Handle non-200 codes when parsing AWS metadata [GH-614] + * cli: Enforce absolute paths for data directories [GH-622] ## 0.2.3 (December 17, 2015) From 8c484875ce9121a9c1d5ca036919f5d44e9be17a Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 1 Dec 2015 11:40:40 -0800 Subject: [PATCH 14/45] Job endpoint handles periodic jobs --- nomad/job_endpoint.go | 44 ++++++++++-- nomad/job_endpoint_test.go | 138 +++++++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+), 4 deletions(-) diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 18da75268..e17cd78fb 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -49,6 +49,14 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis return err } + // Populate the reply with job information + reply.JobModifyIndex = index + + // If the job is periodic, we don't create an eval. + if args.Job.IsPeriodic() { + return nil + } + // Create a new evaluation eval := &structs.Evaluation{ ID: structs.GenerateUUID(), @@ -73,10 +81,9 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis return err } - // Setup the reply + // Populate the reply with eval information reply.EvalID = eval.ID reply.EvalCreateIndex = evalIndex - reply.JobModifyIndex = index reply.Index = evalIndex return nil } @@ -116,6 +123,10 @@ func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegis return fmt.Errorf("job not found") } + if job.IsPeriodic() { + return fmt.Errorf("can't evaluate periodic job") + } + // Create a new evaluation eval := &structs.Evaluation{ ID: structs.GenerateUUID(), @@ -153,6 +164,24 @@ func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobD } defer metrics.MeasureSince([]string{"nomad", "job", "deregister"}, time.Now()) + // Validate the arguments + if args.JobID == "" { + return fmt.Errorf("missing job ID for evaluation") + } + + // Lookup the job + snap, err := j.srv.fsm.State().Snapshot() + if err != nil { + return err + } + job, err := snap.JobByID(args.JobID) + if err != nil { + return err + } + if job == nil { + return fmt.Errorf("job not found") + } + // Commit this update via Raft _, index, err := j.srv.raftApply(structs.JobDeregisterRequestType, args) if err != nil { @@ -160,6 +189,14 @@ func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobD return err } + // Populate the reply with job information + reply.JobModifyIndex = index + + // If the job is periodic, we don't create an eval. + if job.IsPeriodic() { + return nil + } + // Create a new evaluation // XXX: The job priority / type is strange for this, since it's not a high // priority even if the job was. The scheduler itself also doesn't matter, @@ -185,10 +222,9 @@ func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobD return err } - // Setup the reply + // Populate the reply with eval information reply.EvalID = eval.ID reply.EvalCreateIndex = evalIndex - reply.JobModifyIndex = index reply.Index = evalIndex return nil } diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index 5bc3bb952..d1dc0948a 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -233,6 +233,55 @@ func TestJobEndpoint_Register_GC_Set(t *testing.T) { } } +func TestJobEndpoint_Register_Periodic(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request for a periodic job. + job := mock.Job() + job.Type = structs.JobTypeBatch + job.Periodic = &structs.PeriodicConfig{Enabled: true} + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + // Fetch the response + var resp structs.JobRegisterResponse + if err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp); err != nil { + t.Fatalf("err: %v", err) + } + if resp.JobModifyIndex == 0 { + t.Fatalf("bad index: %d", resp.Index) + } + + // Check for the node in the FSM + state := s1.fsm.State() + out, err := state.JobByID(job.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + if out == nil { + t.Fatalf("expected job") + } + if out.CreateIndex != resp.JobModifyIndex { + t.Fatalf("index mis-match") + } + serviceName := out.TaskGroups[0].Tasks[0].Services[0].Name + expectedServiceName := "web-frontend" + if serviceName != expectedServiceName { + t.Fatalf("Expected Service Name: %s, Actual: %s", expectedServiceName, serviceName) + } + + if resp.EvalID != "" { + t.Fatalf("Register created an eval for a periodic job") + } +} + func TestJobEndpoint_Evaluate(t *testing.T) { s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue @@ -304,6 +353,44 @@ func TestJobEndpoint_Evaluate(t *testing.T) { } } +func TestJobEndpoint_Evaluate_Periodic(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request + job := mock.Job() + job.Type = structs.JobTypeBatch + job.Periodic = &structs.PeriodicConfig{Enabled: true} + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + // Fetch the response + var resp structs.JobRegisterResponse + if err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp); err != nil { + t.Fatalf("err: %v", err) + } + if resp.JobModifyIndex == 0 { + t.Fatalf("bad index: %d", resp.Index) + } + + // Force a re-evaluation + reEval := &structs.JobEvaluateRequest{ + JobID: job.ID, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + // Fetch the response + if err := msgpackrpc.CallWithCodec(codec, "Job.Evaluate", reEval, &resp); err == nil { + t.Fatal("expect an err") + } +} + func TestJobEndpoint_Deregister(t *testing.T) { s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue @@ -380,6 +467,57 @@ func TestJobEndpoint_Deregister(t *testing.T) { } } +func TestJobEndpoint_Deregister_Periodic(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request + job := mock.Job() + job.Type = structs.JobTypeBatch + job.Periodic = &structs.PeriodicConfig{Enabled: true} + reg := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + // Fetch the response + var resp structs.JobRegisterResponse + if err := msgpackrpc.CallWithCodec(codec, "Job.Register", reg, &resp); err != nil { + t.Fatalf("err: %v", err) + } + + // Deregister + dereg := &structs.JobDeregisterRequest{ + JobID: job.ID, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var resp2 structs.JobDeregisterResponse + if err := msgpackrpc.CallWithCodec(codec, "Job.Deregister", dereg, &resp2); err != nil { + t.Fatalf("err: %v", err) + } + if resp2.JobModifyIndex == 0 { + t.Fatalf("bad index: %d", resp2.Index) + } + + // Check for the node in the FSM + state := s1.fsm.State() + out, err := state.JobByID(job.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + if out != nil { + t.Fatalf("unexpected job") + } + + if resp.EvalID != "" { + t.Fatalf("Deregister created an eval for a periodic job") + } +} + func TestJobEndpoint_GetJob(t *testing.T) { s1 := testServer(t, nil) defer s1.Shutdown() From a892d61ae7e4942b9df33a282e519440d6b2ce95 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 1 Dec 2015 14:54:57 -0800 Subject: [PATCH 15/45] FSM integration --- nomad/fsm.go | 38 +++++++++++++++++++++++++++----------- nomad/fsm_test.go | 31 +++++++++++++++++++++++-------- nomad/periodic.go | 27 +++++++++++++++++++++++++++ nomad/periodic_test.go | 38 ++++++++++++++++++++++++++++++++++++++ nomad/server.go | 5 ++++- 5 files changed, 119 insertions(+), 20 deletions(-) create mode 100644 nomad/periodic.go create mode 100644 nomad/periodic_test.go diff --git a/nomad/fsm.go b/nomad/fsm.go index 71c40d68f..ca81f0a77 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -38,11 +38,12 @@ const ( // along with Raft to provide strong consistency. We implement // this outside the Server to avoid exposing this outside the package. type nomadFSM struct { - evalBroker *EvalBroker - logOutput io.Writer - logger *log.Logger - state *state.StateStore - timetable *TimeTable + evalBroker *EvalBroker + periodicRunner PeriodicRunner + logOutput io.Writer + logger *log.Logger + state *state.StateStore + timetable *TimeTable } // nomadSnapshot is used to provide a snapshot of the current @@ -58,7 +59,7 @@ type snapshotHeader struct { } // NewFSMPath is used to construct a new FSM with a blank state -func NewFSM(evalBroker *EvalBroker, logOutput io.Writer) (*nomadFSM, error) { +func NewFSM(evalBroker *EvalBroker, periodic PeriodicRunner, logOutput io.Writer) (*nomadFSM, error) { // Create a state store state, err := state.NewStateStore(logOutput) if err != nil { @@ -66,11 +67,12 @@ func NewFSM(evalBroker *EvalBroker, logOutput io.Writer) (*nomadFSM, error) { } fsm := &nomadFSM{ - evalBroker: evalBroker, - logOutput: logOutput, - logger: log.New(logOutput, "", log.LstdFlags), - state: state, - timetable: NewTimeTable(timeTableGranularity, timeTableLimit), + evalBroker: evalBroker, + periodicRunner: periodic, + logOutput: logOutput, + logger: log.New(logOutput, "", log.LstdFlags), + state: state, + timetable: NewTimeTable(timeTableGranularity, timeTableLimit), } return fsm, nil } @@ -204,6 +206,14 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) return err } + + if req.Job.IsPeriodic() { + if err := n.periodicRunner.Add(req.Job); err != nil { + n.logger.Printf("[ERR] nomad.fsm: PeriodicRunner.Add failed: %v", err) + return err + } + } + return nil } @@ -218,6 +228,12 @@ func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) return err } + + if err := n.periodicRunner.Remove(req.JobID); err != nil { + n.logger.Printf("[ERR] nomad.fsm: PeriodicRunner.Remove failed: %v", err) + return err + } + return nil } diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 4ca75158b..eaba36917 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -43,7 +43,7 @@ func testStateStore(t *testing.T) *state.StateStore { } func testFSM(t *testing.T) *nomadFSM { - fsm, err := NewFSM(testBroker(t, 0), os.Stderr) + fsm, err := NewFSM(testBroker(t, 0), NewMockPeriodic(), os.Stderr) if err != nil { t.Fatalf("err: %v", err) } @@ -222,8 +222,11 @@ func TestFSM_UpdateNodeDrain(t *testing.T) { func TestFSM_RegisterJob(t *testing.T) { fsm := testFSM(t) + job := mock.Job() + job.Type = structs.JobTypeBatch + job.Periodic = &structs.PeriodicConfig{Enabled: true} req := structs.JobRegisterRequest{ - Job: mock.Job(), + Job: job, } buf, err := structs.Encode(structs.JobRegisterRequestType, req) if err != nil { @@ -236,15 +239,20 @@ func TestFSM_RegisterJob(t *testing.T) { } // Verify we are registered - job, err := fsm.State().JobByID(req.Job.ID) + jobOut, err := fsm.State().JobByID(req.Job.ID) if err != nil { t.Fatalf("err: %v", err) } - if job == nil { + if jobOut == nil { t.Fatalf("not found!") } - if job.CreateIndex != 1 { - t.Fatalf("bad index: %d", job.CreateIndex) + if jobOut.CreateIndex != 1 { + t.Fatalf("bad index: %d", jobOut.CreateIndex) + } + + // Verify it was added to the periodic runner. + if _, ok := fsm.periodicRunner.(*MockPeriodic).Jobs[job.ID]; !ok { + t.Fatal("job not added to periodic runner") } } @@ -252,6 +260,8 @@ func TestFSM_DeregisterJob(t *testing.T) { fsm := testFSM(t) job := mock.Job() + job.Type = structs.JobTypeBatch + job.Periodic = &structs.PeriodicConfig{Enabled: true} req := structs.JobRegisterRequest{ Job: job, } @@ -279,13 +289,18 @@ func TestFSM_DeregisterJob(t *testing.T) { } // Verify we are NOT registered - job, err = fsm.State().JobByID(req.Job.ID) + jobOut, err := fsm.State().JobByID(req.Job.ID) if err != nil { t.Fatalf("err: %v", err) } - if job != nil { + if jobOut != nil { t.Fatalf("job found!") } + + // Verify it was removed from the periodic runner. + if _, ok := fsm.periodicRunner.(*MockPeriodic).Jobs[job.ID]; ok { + t.Fatal("job not removed from periodic runner") + } } func TestFSM_UpdateEval(t *testing.T) { diff --git a/nomad/periodic.go b/nomad/periodic.go new file mode 100644 index 000000000..f3d253ac0 --- /dev/null +++ b/nomad/periodic.go @@ -0,0 +1,27 @@ +package nomad + +import "github.com/hashicorp/nomad/nomad/structs" + +type PeriodicRunner interface { + SetEnabled(enabled bool) + Add(job *structs.Job) error + Remove(jobID string) error + ForceRun(jobID string) error +} + +type PeriodicDispatch struct{} + +func (p *PeriodicDispatch) SetEnabled(enabled bool) { +} + +func (p *PeriodicDispatch) Add(job *structs.Job) error { + return nil +} + +func (p *PeriodicDispatch) Remove(jobID string) error { + return nil +} + +func (p *PeriodicDispatch) ForceRun(jobID string) error { + return nil +} diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go new file mode 100644 index 000000000..539c42658 --- /dev/null +++ b/nomad/periodic_test.go @@ -0,0 +1,38 @@ +package nomad + +import ( + "fmt" + + "github.com/hashicorp/nomad/nomad/structs" +) + +type MockPeriodic struct { + Enabled bool + Jobs map[string]*structs.Job +} + +func NewMockPeriodic() *MockPeriodic { + return &MockPeriodic{Jobs: make(map[string]*structs.Job)} +} + +func (m *MockPeriodic) SetEnabled(enabled bool) { + m.Enabled = enabled +} + +func (m *MockPeriodic) Add(job *structs.Job) error { + if job == nil { + return fmt.Errorf("Must pass non nil job") + } + + m.Jobs[job.ID] = job + return nil +} + +func (m *MockPeriodic) Remove(jobID string) error { + delete(m.Jobs, jobID) + return nil +} + +func (m *MockPeriodic) ForceRun(jobID string) error { + return nil +} diff --git a/nomad/server.go b/nomad/server.go index a7236e004..826aa8458 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -113,6 +113,9 @@ type Server struct { // plans that are waiting to be assessed by the leader planQueue *PlanQueue + // periodicRunner is used to track and create evaluations for periodic jobs. + periodicRunner PeriodicRunner + // heartbeatTimers track the expiration time of each heartbeat that has // a TTL. On expiration, the node status is updated to be 'down'. heartbeatTimers map[string]*time.Timer @@ -406,7 +409,7 @@ func (s *Server) setupRaft() error { // Create the FSM var err error - s.fsm, err = NewFSM(s.evalBroker, s.config.LogOutput) + s.fsm, err = NewFSM(s.evalBroker, s.periodicRunner, s.config.LogOutput) if err != nil { return err } From 670cc50a0285c1aaa845a08e1991a5ce1a976a1e Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 18 Dec 2015 12:26:28 -0800 Subject: [PATCH 16/45] merge --- nomad/leader.go | 16 ++ nomad/mock/mock.go | 10 + nomad/periodic.go | 473 +++++++++++++++++++++++++++++- nomad/periodic_test.go | 492 ++++++++++++++++++++++++++++++++ nomad/server.go | 9 +- nomad/state/schema.go | 9 + nomad/state/state_store.go | 12 + nomad/state/state_store_test.go | 42 +++ nomad/structs/structs.go | 45 +++ nomad/structs/structs_test.go | 74 +++++ 10 files changed, 1177 insertions(+), 5 deletions(-) diff --git a/nomad/leader.go b/nomad/leader.go index 0e267a7dc..41081dcfc 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -117,6 +117,14 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error { return err } + // Enable the periodic dispatcher,since we are now the leader. + s.periodicDispatcher.SetEnabled(true) + + // Restore the periodic dispatcher state + if err := s.restorePeriodicDispatcher(); err != nil { + return err + } + // Scheduler periodic jobs go s.schedulePeriodic(stopCh) @@ -167,6 +175,11 @@ func (s *Server) restoreEvalBroker() error { return nil } +func (s *Server) restorePeriodicDispatcher() error { + // TODO(alex) + return nil +} + // schedulePeriodic is used to do periodic job dispatch while we are leader func (s *Server) schedulePeriodic(stopCh chan struct{}) { evalGC := time.NewTicker(s.config.EvalGCInterval) @@ -250,6 +263,9 @@ func (s *Server) revokeLeadership() error { // Disable the eval broker, since it is only useful as a leader s.evalBroker.SetEnabled(false) + // Disable the periodic dispatcher, since it is only useful as a leader + s.periodicDispatcher.SetEnabled(false) + // Clear the heartbeat timers on either shutdown or step down, // since we are no longer responsible for TTL expirations. if err := s.clearAllHeartbeatTimers(); err != nil { diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 3c2bb4f5e..0d61009a3 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -190,6 +190,16 @@ func SystemJob() *structs.Job { return job } +func PeriodicJob() *structs.Job { + job := Job() + job.Periodic = &structs.PeriodicConfig{ + Enabled: true, + SpecType: structs.PeriodicSpecCron, + Spec: "*/30 * * *", + } + return job +} + func Eval() *structs.Evaluation { eval := &structs.Evaluation{ ID: structs.GenerateUUID(), diff --git a/nomad/periodic.go b/nomad/periodic.go index f3d253ac0..3d527a82e 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -1,27 +1,496 @@ package nomad -import "github.com/hashicorp/nomad/nomad/structs" +import ( + "container/heap" + "errors" + "fmt" + "log" + "sync" + "time" + "github.com/hashicorp/nomad/nomad/structs" +) + +// PeriodicRunner is the interface for tracking and launching periodic jobs at +// their periodic spec. type PeriodicRunner interface { + Start() SetEnabled(enabled bool) Add(job *structs.Job) error Remove(jobID string) error ForceRun(jobID string) error + Tracked() []structs.Job + Flush() } -type PeriodicDispatch struct{} +// PeriodicDispatch is used to track and launch periodic jobs. It maintains the +// set of periodic jobs and creates derived jobs and evaluations per +// instantiation which is determined by the periodic spec. +type PeriodicDispatch struct { + srv *Server + enabled bool + running bool + tracked map[string]*structs.Job + heap *periodicHeap + + updateCh chan struct{} + stopCh chan struct{} + logger *log.Logger + l sync.RWMutex +} + +// NewPeriodicDispatch returns a periodic dispatcher that is used to track and +// launch periodic jobs. +func NewPeriodicDispatch(srv *Server) *PeriodicDispatch { + return &PeriodicDispatch{ + srv: srv, + tracked: make(map[string]*structs.Job), + heap: NewPeriodicHeap(), + updateCh: make(chan struct{}, 1), + stopCh: make(chan struct{}, 1), + logger: srv.logger, + } +} + +// SetEnabled is used to control if the periodic dispatcher is enabled. It +// should only be enabled on the active leader. Disabling an active dispatcher +// will stop any launched go routine and flush the dispatcher. func (p *PeriodicDispatch) SetEnabled(enabled bool) { + p.l.Lock() + p.enabled = enabled + p.l.Unlock() + if !enabled { + p.stopCh <- struct{}{} + p.Flush() + } } +// Start begins the goroutine that creates derived jobs and evals. +func (p *PeriodicDispatch) Start() { + p.l.Lock() + p.running = true + p.l.Unlock() + go p.run() +} + +// Tracked returns the set of tracked job IDs. +func (p *PeriodicDispatch) Tracked() []structs.Job { + p.l.RLock() + defer p.l.RUnlock() + tracked := make([]structs.Job, len(p.tracked)) + i := 0 + for _, job := range p.tracked { + tracked[i] = *job + i++ + } + return tracked +} + +// Add begins tracking of a periodic job. If it is already tracked, it acts as +// an update to the jobs periodic spec. func (p *PeriodicDispatch) Add(job *structs.Job) error { + p.l.Lock() + defer p.l.Unlock() + + // Do nothing if not enabled + if !p.enabled { + return fmt.Errorf("periodic dispatch disabled") + } + + // If we were tracking a job and it has been disabled or made non-periodic remove it. + disabled := !job.IsPeriodic() || !job.Periodic.Enabled + _, tracked := p.tracked[job.ID] + if tracked && disabled { + return p.Remove(job.ID) + } + + // If the job is diabled and we aren't tracking it, do nothing. + if disabled { + return nil + } + + // Add or update the job. + p.tracked[job.ID] = job + next := job.Periodic.Next(time.Now()) + if tracked { + if err := p.heap.Update(job, next); err != nil { + return fmt.Errorf("failed to update job %v launch time: %v", job.ID, err) + } + } else { + if err := p.heap.Push(job, next); err != nil { + return fmt.Errorf("failed to add job %v", job.ID, err) + } + } + + // Signal an update. + if p.running { + select { + case p.updateCh <- struct{}{}: + default: + } + } + return nil } +// Remove stops tracking the passed job. If the job is not tracked, it is a +// no-op. func (p *PeriodicDispatch) Remove(jobID string) error { + p.l.Lock() + defer p.l.Unlock() + + // Do nothing if not enabled + if !p.enabled { + return fmt.Errorf("periodic dispatch disabled") + } + + if job, tracked := p.tracked[jobID]; tracked { + delete(p.tracked, jobID) + if err := p.heap.Remove(job); err != nil { + return fmt.Errorf("failed to remove tracked job %v: %v", jobID, err) + } + } + + // Signal an update. + if p.running { + select { + case p.updateCh <- struct{}{}: + default: + } + } + return nil } +// ForceRun causes the periodic job to be evaluated immediately. func (p *PeriodicDispatch) ForceRun(jobID string) error { + p.l.Lock() + defer p.l.Unlock() + + // Do nothing if not enabled + if !p.enabled { + return fmt.Errorf("periodic dispatch disabled") + } + + job, tracked := p.tracked[jobID] + if !tracked { + return fmt.Errorf("can't force run non-tracked job %v", jobID) + } + + return p.createEval(job, time.Now()) +} + +// run is a long-lived function that waits til a job's periodic spec is met and +// then creates an evaluation to run the job. +func (p *PeriodicDispatch) run() { + // Do nothing if not enabled. + p.l.RLock() + if !p.enabled { + p.l.RUnlock() + return + } + p.l.RUnlock() + + now := time.Now().Local() + +PICK: + // If there is nothing wait for an update. + p.l.RLock() + if p.heap.Length() == 0 { + p.l.RUnlock() + <-p.updateCh + p.l.RLock() + } + + nextJob, err := p.heap.Peek() + p.l.RUnlock() + if err != nil { + p.logger.Printf("[ERR] nomad.periodic_dispatch: failed to determine next periodic job: %v", err) + return + } + + launchTime := nextJob.next + + // If there are only invalid times, wait for an update. + if launchTime.IsZero() { + <-p.updateCh + goto PICK + } + + select { + case <-p.stopCh: + return + case <-p.updateCh: + goto PICK + case <-time.After(nextJob.next.Sub(now)): + // Get the current time so that we don't miss any jobs will we are + // creating evals. + nowUpdate := time.Now() + + // Create evals for all the jobs with the same launch time. + p.l.Lock() + for { + if p.heap.Length() == 0 { + break + } + + j, err := p.heap.Peek() + if err != nil { + p.logger.Printf("[ERR] nomad.periodic_dispatch: failed to determine next periodic job: %v", err) + break + } + + if j.next != launchTime { + break + } + + if err := p.heap.Update(j.job, j.job.Periodic.Next(nowUpdate)); err != nil { + p.logger.Printf("[ERR] nomad.periodic_dispatch: failed to update next launch of periodic job: %v", j.job.ID, err) + } + + // TODO(alex): Want to be able to check that there isn't a previously + // running cron job for this job. + go p.createEval(j.job, launchTime) + } + + p.l.Unlock() + now = nowUpdate + } + + goto PICK +} + +// createEval instantiates a job based on the passed periodic job and submits an +// evaluation for it. +func (p *PeriodicDispatch) createEval(periodicJob *structs.Job, time time.Time) error { + derived, err := p.deriveJob(periodicJob, time) + if err != nil { + return err + } + + // Commit this update via Raft + req := structs.JobRegisterRequest{Job: derived} + _, index, err := p.srv.raftApply(structs.JobRegisterRequestType, req) + if err != nil { + p.logger.Printf("[ERR] nomad.periodic_dispatch: Register failed: %v", err) + return err + } + + // Create a new evaluation + eval := &structs.Evaluation{ + ID: structs.GenerateUUID(), + Priority: derived.Priority, + Type: derived.Type, + TriggeredBy: structs.EvalTriggerJobRegister, + JobID: derived.ID, + JobModifyIndex: index, + Status: structs.EvalStatusPending, + } + update := &structs.EvalUpdateRequest{ + Evals: []*structs.Evaluation{eval}, + } + + // Commit this evaluation via Raft + // XXX: There is a risk of partial failure where the JobRegister succeeds + // but that the EvalUpdate does not. + _, _, err = p.srv.raftApply(structs.EvalUpdateRequestType, update) + if err != nil { + p.logger.Printf("[ERR] nomad.periodic_dispatch: Eval create failed: %v", err) + return err + } + return nil } + +// deriveJob instantiates a new job based on the passed periodic job and the +// launch time. +// TODO: these jobs need to be marked as GC'able +func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) ( + derived *structs.Job, err error) { + + // Have to recover in case the job copy panics. + defer func() { + if r := recover(); r != nil { + p.logger.Printf("[ERR] nomad.periodic_dispatch: deriving job from"+ + " periodic job %v failed; deregistering from periodic runner: %v", + periodicJob.ID, r) + p.Remove(periodicJob.ID) + derived = nil + err = fmt.Errorf("Failed to create a copy of the periodic job %v: %v", periodicJob.ID, r) + } + }() + + // Create a copy of the periodic job, give it a derived ID/Name and make it + // non-periodic. + derived = periodicJob.Copy() + derived.ParentID = periodicJob.ID + derived.ID = p.derivedJobID(periodicJob, time) + derived.Name = periodicJob.ID + derived.Periodic = nil + return +} + +// deriveJobID returns a job ID based on the parent periodic job and the launch +// time. +func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time) string { + return fmt.Sprintf("%s-%d", periodicJob.ID, time.Unix()) +} + +// CreatedEvals returns the set of evaluations created from the passed periodic +// job. +func (p *PeriodicDispatch) CreatedEvals(periodicJobID string) ([]*structs.Evaluation, error) { + state := p.srv.fsm.State() + iter, err := state.ChildJobs(periodicJobID) + if err != nil { + return nil, fmt.Errorf("failed to look up children of job %v: %v", periodicJobID, err) + } + + var evals []*structs.Evaluation + for i := iter.Next(); i != nil; i = iter.Next() { + job := i.(*structs.Job) + childEvals, err := state.EvalsByJob(job.ID) + if err != nil { + fmt.Errorf("failed to look up evals for job %v: %v", job.ID, err) + } + + evals = append(evals, childEvals...) + } + + return evals, nil +} + +// Flush clears the state of the PeriodicDispatcher +func (p *PeriodicDispatch) Flush() { + p.l.Lock() + defer p.l.Unlock() + p.stopCh = make(chan struct{}, 1) + p.updateCh = make(chan struct{}, 1) + p.tracked = make(map[string]*structs.Job) + p.heap = NewPeriodicHeap() +} + +// TODO +type periodicHeap struct { + index map[string]*periodicJob + heap periodicHeapImp +} + +type periodicJob struct { + job *structs.Job + next time.Time + index int +} + +func NewPeriodicHeap() *periodicHeap { + return &periodicHeap{ + index: make(map[string]*periodicJob), + heap: make(periodicHeapImp, 0), + } +} + +func (p *periodicHeap) Push(job *structs.Job, next time.Time) error { + if _, ok := p.index[job.ID]; ok { + return fmt.Errorf("job %v already exists", job.ID) + } + + pJob := &periodicJob{job, next, 0} + p.index[job.ID] = pJob + heap.Push(&p.heap, pJob) + return nil +} + +func (p *periodicHeap) Pop() (*periodicJob, error) { + if len(p.heap) == 0 { + return nil, errors.New("heap is empty") + } + + pJob := heap.Pop(&p.heap).(*periodicJob) + delete(p.index, pJob.job.ID) + return pJob, nil +} + +func (p *periodicHeap) Peek() (periodicJob, error) { + if len(p.heap) == 0 { + return periodicJob{}, errors.New("heap is empty") + } + + return *(p.heap[0]), nil +} + +func (p *periodicHeap) Contains(job *structs.Job) bool { + _, ok := p.index[job.ID] + return ok +} + +func (p *periodicHeap) Update(job *structs.Job, next time.Time) error { + if job, ok := p.index[job.ID]; ok { + p.heap.update(job, next) + return nil + } + + return fmt.Errorf("heap doesn't contain job %v", job.ID) +} + +func (p *periodicHeap) Remove(job *structs.Job) error { + if pJob, ok := p.index[job.ID]; ok { + heap.Remove(&p.heap, pJob.index) + delete(p.index, job.ID) + return nil + } + + return fmt.Errorf("heap doesn't contain job %v", job.ID) +} + +func (p *periodicHeap) Length() int { + return len(p.heap) +} + +type periodicHeapImp []*periodicJob + +func (h periodicHeapImp) Len() int { return len(h) } + +func (h periodicHeapImp) Less(i, j int) bool { + // Two zero times should return false. + // Otherwise, zero is "greater" than any other time. + // (To sort it at the end of the list.) + // Sort such that zero times are at the end of the list. + iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() + if iZero && jZero { + return false + } else if iZero { + return false + } else if jZero { + return true + } + + return h[i].next.Before(h[j].next) +} + +func (h periodicHeapImp) Swap(i, j int) { + h[i], h[j] = h[j], h[i] + h[i].index = i + h[j].index = j +} + +func (h *periodicHeapImp) Push(x interface{}) { + n := len(*h) + job := x.(*periodicJob) + job.index = n + *h = append(*h, job) +} + +func (h *periodicHeapImp) Pop() interface{} { + old := *h + n := len(old) + job := old[n-1] + job.index = -1 // for safety + *h = old[0 : n-1] + return job +} + +// update modifies the priority and next time of an periodic job in the queue. +func (h *periodicHeapImp) update(job *periodicJob, next time.Time) { + job.next = next + heap.Fix(h, job.index) +} diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index 539c42658..f199038f3 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -2,10 +2,20 @@ package nomad import ( "fmt" + "math/rand" + "reflect" + "strconv" + "strings" + "testing" + "time" + "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" ) +// MockPeriodic can be used by other tests that want to mock the periodic +// dispatcher. type MockPeriodic struct { Enabled bool Jobs map[string]*structs.Job @@ -36,3 +46,485 @@ func (m *MockPeriodic) Remove(jobID string) error { func (m *MockPeriodic) ForceRun(jobID string) error { return nil } + +func (m *MockPeriodic) Start() {} + +func (m *MockPeriodic) Flush() { + m.Jobs = make(map[string]*structs.Job) +} + +func (m *MockPeriodic) Tracked() []structs.Job { + tracked := make([]structs.Job, len(m.Jobs)) + i := 0 + for _, job := range m.Jobs { + tracked[i] = *job + i++ + } + return tracked +} + +func TestPeriodicDispatch_DisabledOperations(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + // Disable the dispatcher. + s1.periodicDispatcher.SetEnabled(false) + + job := mock.PeriodicJob() + if err := s1.periodicDispatcher.Add(job); err == nil { + t.Fatalf("Add on disabled dispatcher should fail") + } + + if err := s1.periodicDispatcher.Remove(job.ID); err == nil { + t.Fatalf("Remove on disabled dispatcher should fail") + } +} + +func TestPeriodicDispatch_Add_NonPeriodic(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + job := mock.Job() + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add of non-periodic job failed: %v; expect no-op", err) + } + + tracked := s1.periodicDispatcher.Tracked() + if len(tracked) != 0 { + t.Fatalf("Add of non-periodic job should be no-op: %v", tracked) + } +} + +func TestPeriodicDispatch_Add_UpdateJob(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + job := mock.PeriodicJob() + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + tracked := s1.periodicDispatcher.Tracked() + if len(tracked) != 1 { + t.Fatalf("Add didn't track the job: %v", tracked) + } + + // Update the job and add it again. + job.Periodic.Spec = "foo" + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + tracked = s1.periodicDispatcher.Tracked() + if len(tracked) != 1 { + t.Fatalf("Add didn't update: %v", tracked) + } + + if !reflect.DeepEqual(*job, tracked[0]) { + t.Fatalf("Add didn't properly update: got %v; want %v", tracked[0], job) + } +} + +func TestPeriodicDispatch_Add_TriggersUpdate(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + // Start the periodic dispatcher. + s1.periodicDispatcher.Start() + + // Create a job that won't be evalauted for a while. + job := testPeriodicJob(time.Now().Add(10 * time.Second)) + + // Add it. + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + // Update it to be sooner and re-add. + expected := time.Now().Add(1 * time.Second) + job.Periodic.Spec = fmt.Sprintf("%d", expected.Unix()) + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + time.Sleep(2 * time.Second) + + // Check that an eval was created for the right time. + evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + if err != nil { + t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + } + + if len(evals) != 1 { + t.Fatalf("Unexpected number of evals created; got %#v; want 1", evals) + } + + eval := evals[0] + expID := s1.periodicDispatcher.derivedJobID(job, expected) + if eval.JobID != expID { + t.Fatalf("periodic dispatcher created eval at the wrong time; got %v; want %v", + eval.JobID, expID) + } +} + +func TestPeriodicDispatch_Remove_Untracked(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + if err := s1.periodicDispatcher.Remove("foo"); err != nil { + t.Fatalf("Remove failed %v; expected a no-op", err) + } +} + +func TestPeriodicDispatch_Remove_Tracked(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + job := mock.PeriodicJob() + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + tracked := s1.periodicDispatcher.Tracked() + if len(tracked) != 1 { + t.Fatalf("Add didn't track the job: %v", tracked) + } + + if err := s1.periodicDispatcher.Remove(job.ID); err != nil { + t.Fatalf("Remove failed %v", err) + } + + tracked = s1.periodicDispatcher.Tracked() + if len(tracked) != 0 { + t.Fatalf("Remove didn't untrack the job: %v", tracked) + } +} + +func TestPeriodicDispatch_Remove_TriggersUpdate(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + // Start the periodic dispatcher. + s1.periodicDispatcher.Start() + + // Create a job that will be evaluated soon. + job := testPeriodicJob(time.Now().Add(1 * time.Second)) + + // Add it. + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + // Remove the job. + if err := s1.periodicDispatcher.Remove(job.ID); err != nil { + t.Fatalf("Add failed %v", err) + } + + time.Sleep(2 * time.Second) + + // Check that an eval wasn't created. + evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + if err != nil { + t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + } + + if len(evals) != 0 { + t.Fatalf("Remove didn't cancel creation of an eval: %#v", evals) + } +} + +func TestPeriodicDispatch_ForceRun_Untracked(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + if err := s1.periodicDispatcher.ForceRun("foo"); err == nil { + t.Fatal("ForceRun of untracked job should fail") + } +} + +func TestPeriodicDispatch_ForceRun_Tracked(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + // Start the periodic dispatcher. + s1.periodicDispatcher.Start() + + // Create a job that won't be evalauted for a while. + job := testPeriodicJob(time.Now().Add(10 * time.Second)) + + // Add it. + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + // ForceRun the job + if err := s1.periodicDispatcher.ForceRun(job.ID); err != nil { + t.Fatalf("ForceRun failed %v", err) + } + + // Check that an eval was created for the right time. + evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + if err != nil { + t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + } + + if len(evals) != 1 { + t.Fatalf("Unexpected number of evals created; got %#v; want 1", evals) + } +} + +func TestPeriodicDispatch_Run_Multiple(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + // Start the periodic dispatcher. + s1.periodicDispatcher.Start() + + // Create a job that will be launched twice. + launch1 := time.Now().Add(1 * time.Second) + launch2 := time.Now().Add(2 * time.Second) + job := testPeriodicJob(launch1, launch2) + + // Add it. + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + time.Sleep(3 * time.Second) + + // Check that the evals were created correctly + evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + if err != nil { + t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + } + + d := s1.periodicDispatcher + expected := []string{d.derivedJobID(job, launch1), d.derivedJobID(job, launch2)} + for i, eval := range evals { + if eval.JobID != expected[i] { + t.Fatalf("eval created incorrectly; got %v; want %v", eval.JobID, expected[i]) + } + } +} + +func TestPeriodicDispatch_Run_SameTime(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + // Start the periodic dispatcher. + s1.periodicDispatcher.Start() + + // Create two job that will be launched at the same time. + launch := time.Now().Add(1 * time.Second) + job := testPeriodicJob(launch) + job2 := testPeriodicJob(launch) + + // Add them. + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + if err := s1.periodicDispatcher.Add(job2); err != nil { + t.Fatalf("Add failed %v", err) + } + + time.Sleep(2 * time.Second) + + // Check that the evals were created correctly + for _, job := range []*structs.Job{job, job2} { + evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + if err != nil { + t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + } + + if len(evals) != 1 { + t.Fatalf("expected 1 eval; got %#v", evals) + } + + d := s1.periodicDispatcher + expected := d.derivedJobID(job, launch) + if evals[0].JobID != expected { + t.Fatalf("eval created incorrectly; got %v; want %v", evals[0].JobID, expected) + } + } +} + +// This test adds and removes a bunch of jobs, some launching at the same time, +// some after each other and some invalid times, and ensures the correct +// behavior. +func TestPeriodicDispatch_Complex(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + // Start the periodic dispatcher. + s1.periodicDispatcher.Start() + + // Create some jobs launching at different times. + now := time.Now() + same := now.Add(1 * time.Second) + launch1 := same.Add(1 * time.Second) + launch2 := same.Add(1500 * time.Millisecond) + launch3 := same.Add(2 * time.Second) + invalid := now.Add(-200 * time.Second) + + // Create two jobs launching at the same time. + job1 := testPeriodicJob(same) + job2 := testPeriodicJob(same) + + // Create a job that will never launch. + job3 := testPeriodicJob(invalid) + + // Create a job that launches twice. + job4 := testPeriodicJob(launch1, launch3) + + // Create a job that launches once. + job5 := testPeriodicJob(launch2) + + // Create 3 jobs we will delete. + job6 := testPeriodicJob(same) + job7 := testPeriodicJob(launch1, launch3) + job8 := testPeriodicJob(launch2) + + // Create a map of expected eval job ids. + d := s1.periodicDispatcher + expected := map[string][]string{ + job1.ID: []string{d.derivedJobID(job1, same)}, + job2.ID: []string{d.derivedJobID(job2, same)}, + job3.ID: nil, + job4.ID: []string{ + d.derivedJobID(job4, launch1), + d.derivedJobID(job4, launch3), + }, + job5.ID: []string{d.derivedJobID(job5, launch2)}, + job6.ID: nil, + job7.ID: nil, + job8.ID: nil, + } + + // Shuffle the jobs so they can be added randomly + jobs := []*structs.Job{job1, job2, job3, job4, job5, job6, job7, job8} + toDelete := []*structs.Job{job6, job7, job8} + shuffle(jobs) + shuffle(toDelete) + + for _, job := range jobs { + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + } + + for _, job := range toDelete { + if err := s1.periodicDispatcher.Remove(job.ID); err != nil { + t.Fatalf("Remove failed %v", err) + } + } + + time.Sleep(4 * time.Second) + actual := make(map[string][]string, len(expected)) + for _, job := range jobs { + evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + if err != nil { + t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + } + + var jobs []string + for _, eval := range evals { + jobs = append(jobs, eval.JobID) + } + actual[job.ID] = jobs + } + + if !reflect.DeepEqual(actual, expected) { + t.Fatalf("Unexpected evals; got %#v; want %#v", actual, expected) + } +} + +func shuffle(jobs []*structs.Job) { + rand.Seed(time.Now().Unix()) + for i := range jobs { + j := rand.Intn(len(jobs)) + jobs[i], jobs[j] = jobs[j], jobs[i] + } +} + +func testPeriodicJob(times ...time.Time) *structs.Job { + job := mock.PeriodicJob() + job.Periodic.SpecType = structs.PeriodicSpecTest + + l := make([]string, len(times)) + for i, t := range times { + l[i] = strconv.Itoa(int(t.Unix())) + } + + job.Periodic.Spec = strings.Join(l, ",") + return job +} + +// TODO: Check that it doesn't create evals for overlapping things. + +func TestPeriodicHeap_Order(t *testing.T) { + h := NewPeriodicHeap() + j1 := mock.PeriodicJob() + j2 := mock.PeriodicJob() + j3 := mock.PeriodicJob() + + lookup := map[*structs.Job]string{ + j1: "j1", + j2: "j2", + j3: "j3", + } + + h.Push(j1, time.Time{}) + h.Push(j2, time.Unix(10, 0)) + h.Push(j3, time.Unix(11, 0)) + + exp := []string{"j2", "j3", "j1"} + var act []string + for i := 0; i < 3; i++ { + pJob, err := h.Pop() + if err != nil { + t.Fatal(err) + } + + act = append(act, lookup[pJob.job]) + } + + if !reflect.DeepEqual(act, exp) { + t.Fatalf("Wrong ordering; got %v; want %v", act, exp) + } +} diff --git a/nomad/server.go b/nomad/server.go index 826aa8458..3a1c25d83 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -113,8 +113,8 @@ type Server struct { // plans that are waiting to be assessed by the leader planQueue *PlanQueue - // periodicRunner is used to track and create evaluations for periodic jobs. - periodicRunner PeriodicRunner + // periodicDispatcher is used to track and create evaluations for periodic jobs. + periodicDispatcher *PeriodicDispatch // heartbeatTimers track the expiration time of each heartbeat that has // a TTL. On expiration, the node status is updated to be 'down'. @@ -184,6 +184,9 @@ func NewServer(config *Config) (*Server, error) { shutdownCh: make(chan struct{}), } + // Create the periodic dispatcher for launching periodic jobs. + s.periodicDispatcher = NewPeriodicDispatch(s) + // Initialize the RPC layer // TODO: TLS... if err := s.setupRPC(nil); err != nil { @@ -409,7 +412,7 @@ func (s *Server) setupRaft() error { // Create the FSM var err error - s.fsm, err = NewFSM(s.evalBroker, s.periodicRunner, s.config.LogOutput) + s.fsm, err = NewFSM(s.evalBroker, s.periodicDispatcher, s.config.LogOutput) if err != nil { return err } diff --git a/nomad/state/schema.go b/nomad/state/schema.go index 961cb67a7..f0ee0fd75 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -109,6 +109,15 @@ func jobTableSchema() *memdb.TableSchema { Conditional: jobIsGCable, }, }, + "parent": &memdb.IndexSchema{ + Name: "parent", + AllowMissing: true, + Unique: false, + Indexer: &memdb.StringFieldIndex{ + Field: "ParentID", + Lowercase: true, + }, + }, }, } } diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 7eef401ec..25c5151f6 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -359,6 +359,18 @@ func (s *StateStore) Jobs() (memdb.ResultIterator, error) { return iter, nil } +// ChildJobs returns an iterator over all the children of the passed job. +func (s *StateStore) ChildJobs(id string) (memdb.ResultIterator, error) { + txn := s.db.Txn(false) + + // Scan all jobs whose parent is the passed id. + iter, err := txn.Get("jobs", "parent", id) + if err != nil { + return nil, err + } + return iter, nil +} + // JobsByScheduler returns an iterator over all the jobs with the specific // scheduler type. func (s *StateStore) JobsByScheduler(schedulerType string) (memdb.ResultIterator, error) { diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 0609f3048..23331ee57 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -404,6 +404,48 @@ func TestStateStore_Jobs(t *testing.T) { } } +func TestStateStore_ChildJobs(t *testing.T) { + state := testStateStore(t) + parent := mock.Job() + var childJobs []*structs.Job + + if err := state.UpsertJob(999, parent); err != nil { + t.Fatalf("err: %v", err) + } + + for i := 0; i < 10; i++ { + job := mock.Job() + job.ParentID = parent.ID + childJobs = append(childJobs, job) + + err := state.UpsertJob(1000+uint64(i), job) + if err != nil { + t.Fatalf("err: %v", err) + } + } + + iter, err := state.ChildJobs(parent.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + + var out []*structs.Job + for { + raw := iter.Next() + if raw == nil { + break + } + out = append(out, raw.(*structs.Job)) + } + + sort.Sort(JobIDSort(childJobs)) + sort.Sort(JobIDSort(out)) + + if !reflect.DeepEqual(childJobs, out) { + t.Fatalf("bad: %#v %#v", childJobs, out) + } +} + func TestStateStore_JobsByScheduler(t *testing.T) { state := testStateStore(t) var serviceJobs []*structs.Job diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index bed272309..db1fd8c79 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -8,6 +8,7 @@ import ( "io" "reflect" "regexp" + "strconv" "strings" "time" @@ -16,6 +17,7 @@ import ( "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-version" "github.com/hashicorp/nomad/helper/args" + "github.com/mitchellh/copystructure" ) var ( @@ -719,6 +721,9 @@ type Job struct { // specified hierarchically like LineOfBiz/OrgName/Team/Project ID string + // ParentID is the unique identifier of the job that spawned this job. + ParentID string + // Name is the logical name of the job used to refer to it. This is unique // per region, but not unique globally. Name string @@ -787,6 +792,17 @@ func (j *Job) InitFields() { } } +// Copy returns a deep copy of the Job. It is expected that callers use recover. +// This job can panic if the deep copy failed as it uses reflection. +func (j *Job) Copy() *Job { + i, err := copystructure.Copy(j) + if err != nil { + panic(err) + } + + return i.(*Job) +} + // Validate is used to sanity check a job input func (j *Job) Validate() error { var mErr multierror.Error @@ -914,6 +930,10 @@ func (u *UpdateStrategy) Rolling() bool { const ( // PeriodicSpecCron is used for a cron spec. PeriodicSpecCron = "cron" + + // PeriodicSpecTest is only used by unit tests. It is a sorted, comma + // seperated list of unix timestamps at which to launch. + PeriodicSpecTest = "test" ) // Periodic defines the interval a job should be run at. @@ -944,6 +964,8 @@ func (p *PeriodicConfig) Validate() error { if _, err := cronexpr.Parse(p.Spec); err != nil { return fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err) } + case PeriodicSpecTest: + // No-op default: return fmt.Errorf("Unknown specification type %q", p.SpecType) } @@ -961,6 +983,29 @@ func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { if e, err := cronexpr.Parse(p.Spec); err == nil { return e.Next(fromTime) } + case PeriodicSpecTest: + split := strings.Split(p.Spec, ",") + if len(split) == 1 && split[0] == "" { + return time.Time{} + } + + // Parse the times + times := make([]time.Time, len(split)) + for i, s := range split { + unix, err := strconv.Atoi(s) + if err != nil { + return time.Time{} + } + + times[i] = time.Unix(int64(unix), 0) + } + + // Find the next match + for _, next := range times { + if fromTime.Before(next) { + return next + } + } } return time.Time{} diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 2b74c3f1a..22e5dfea9 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -93,6 +93,80 @@ func TestJob_Validate(t *testing.T) { } } +func TestJob_Copy(t *testing.T) { + j := &Job{ + Region: "global", + ID: GenerateUUID(), + Name: "my-job", + Type: JobTypeService, + Priority: 50, + AllAtOnce: false, + Datacenters: []string{"dc1"}, + Constraints: []*Constraint{ + &Constraint{ + LTarget: "$attr.kernel.name", + RTarget: "linux", + Operand: "=", + }, + }, + Periodic: &PeriodicConfig{ + Enabled: false, + }, + TaskGroups: []*TaskGroup{ + &TaskGroup{ + Name: "web", + Count: 10, + RestartPolicy: &RestartPolicy{ + Attempts: 3, + Interval: 10 * time.Minute, + Delay: 1 * time.Minute, + }, + Tasks: []*Task{ + &Task{ + Name: "web", + Driver: "exec", + Config: map[string]interface{}{ + "command": "/bin/date", + }, + Env: map[string]string{ + "FOO": "bar", + }, + Services: []*Service{ + { + Name: "${TASK}-frontend", + PortLabel: "http", + }, + }, + Resources: &Resources{ + CPU: 500, + MemoryMB: 256, + Networks: []*NetworkResource{ + &NetworkResource{ + MBits: 50, + DynamicPorts: []Port{{Label: "http"}}, + }, + }, + }, + }, + }, + Meta: map[string]string{ + "elb_check_type": "http", + "elb_check_interval": "30s", + "elb_check_min": "3", + }, + }, + }, + Meta: map[string]string{ + "owner": "armon", + }, + } + + c := j.Copy() + if !reflect.DeepEqual(j, c) { + t.Fatalf("Copy() returned an unequal Job; got %v; want %v", c, j) + } +} + func TestJob_IsPeriodic(t *testing.T) { j := &Job{ Type: JobTypeService, From 9a7b363b94d9492cc333ed60f83e1984ce03ea73 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 4 Dec 2015 09:49:42 -0800 Subject: [PATCH 17/45] Add periodic index to job table --- nomad/state/schema.go | 8 ++++ nomad/state/state_store.go | 12 ++++++ nomad/state/state_store_test.go | 66 +++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/nomad/state/schema.go b/nomad/state/schema.go index f0ee0fd75..d3119016b 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -118,6 +118,14 @@ func jobTableSchema() *memdb.TableSchema { Lowercase: true, }, }, + "periodic": &memdb.IndexSchema{ + Name: "periodic", + AllowMissing: false, + Unique: false, + Indexer: &memdb.FieldSetIndex{ + Field: "Periodic", + }, + }, }, } } diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 25c5151f6..f0953d403 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -371,6 +371,18 @@ func (s *StateStore) ChildJobs(id string) (memdb.ResultIterator, error) { return iter, nil } +// JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. +func (s *StateStore) JobsByPeriodic(periodic bool) (memdb.ResultIterator, error) { + txn := s.db.Txn(false) + + // Scan all jobs whose parent is the passed id. + iter, err := txn.Get("jobs", "periodic", periodic) + if err != nil { + return nil, err + } + return iter, nil +} + // JobsByScheduler returns an iterator over all the jobs with the specific // scheduler type. func (s *StateStore) JobsByScheduler(schedulerType string) (memdb.ResultIterator, error) { diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 23331ee57..a1504e936 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -446,6 +446,72 @@ func TestStateStore_ChildJobs(t *testing.T) { } } +func TestStateStore_JobsByPeriodic(t *testing.T) { + state := testStateStore(t) + var periodic, nonPeriodic []*structs.Job + + for i := 0; i < 10; i++ { + job := mock.Job() + nonPeriodic = append(nonPeriodic, job) + + err := state.UpsertJob(1000+uint64(i), job) + if err != nil { + t.Fatalf("err: %v", err) + } + } + + for i := 0; i < 10; i++ { + job := mock.PeriodicJob() + periodic = append(periodic, job) + + err := state.UpsertJob(2000+uint64(i), job) + if err != nil { + t.Fatalf("err: %v", err) + } + } + + iter, err := state.JobsByPeriodic(true) + if err != nil { + t.Fatalf("err: %v", err) + } + + var outPeriodic []*structs.Job + for { + raw := iter.Next() + if raw == nil { + break + } + outPeriodic = append(outPeriodic, raw.(*structs.Job)) + } + + iter, err = state.JobsByPeriodic(false) + if err != nil { + t.Fatalf("err: %v", err) + } + + var outNonPeriodic []*structs.Job + for { + raw := iter.Next() + if raw == nil { + break + } + outNonPeriodic = append(outNonPeriodic, raw.(*structs.Job)) + } + + sort.Sort(JobIDSort(periodic)) + sort.Sort(JobIDSort(nonPeriodic)) + sort.Sort(JobIDSort(outPeriodic)) + sort.Sort(JobIDSort(outNonPeriodic)) + + if !reflect.DeepEqual(periodic, outPeriodic) { + t.Fatalf("bad: %#v %#v", periodic, outPeriodic) + } + + if !reflect.DeepEqual(nonPeriodic, outNonPeriodic) { + t.Fatalf("bad: %#v %#v", nonPeriodic, outNonPeriodic) + } +} + func TestStateStore_JobsByScheduler(t *testing.T) { state := testStateStore(t) var serviceJobs []*structs.Job From f6769c3d964875466ba2a6f24d8f860a92393181 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 4 Dec 2015 15:10:08 -0800 Subject: [PATCH 18/45] Leader election restore, add structs to api jobs --- api/jobs.go | 10 ++- nomad/leader.go | 62 ++++++++++++- nomad/leader_test.go | 184 +++++++++++++++++++++++++++++++++++++++ nomad/periodic.go | 94 ++++++++++++++++---- nomad/periodic_test.go | 60 ++++++++++--- nomad/structs/structs.go | 12 ++- nomad/timetable.go | 2 +- 7 files changed, 387 insertions(+), 37 deletions(-) diff --git a/api/jobs.go b/api/jobs.go index 17e75daff..3eea39daf 100644 --- a/api/jobs.go +++ b/api/jobs.go @@ -101,12 +101,19 @@ func (j *Jobs) ForceEvaluate(jobID string, q *WriteOptions) (string, *WriteMeta, return resp.EvalID, wm, nil } -//UpdateStrategy is for serializing update strategy for a job. +// UpdateStrategy is for serializing update strategy for a job. type UpdateStrategy struct { Stagger time.Duration MaxParallel int } +// PeriodicConfig is for serializing periodic config for a job. +type PeriodicConfig struct { + Enabled bool + Spec string + SpecType string +} + // Job is used to serialize a job. type Job struct { Region string @@ -119,6 +126,7 @@ type Job struct { Constraints []*Constraint TaskGroups []*TaskGroup Update *UpdateStrategy + Periodic *PeriodicConfig Meta map[string]string Status string StatusDescription string diff --git a/nomad/leader.go b/nomad/leader.go index 41081dcfc..948ca219f 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -117,8 +117,9 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error { return err } - // Enable the periodic dispatcher,since we are now the leader. + // Enable the periodic dispatcher, since we are now the leader. s.periodicDispatcher.SetEnabled(true) + s.periodicDispatcher.Start() // Restore the periodic dispatcher state if err := s.restorePeriodicDispatcher(); err != nil { @@ -175,8 +176,65 @@ func (s *Server) restoreEvalBroker() error { return nil } +// restorePeriodicDispatcher is used to restore all periodic jobs into the +// periodic dispatcher. It also determines if a periodic job should have been +// created during the leadership transition and force runs them. The periodic +// dispatcher is maintained only by the leader, so it must be restored anytime a +// leadership transition takes place. func (s *Server) restorePeriodicDispatcher() error { - // TODO(alex) + iter, err := s.fsm.State().JobsByPeriodic(true) + if err != nil { + return fmt.Errorf("failed to get periodic jobs: %v", err) + } + + now := time.Now() + var last time.Time + for i := iter.Next(); i != nil; i = iter.Next() { + job := i.(*structs.Job) + s.periodicDispatcher.Add(job) + + // Need to force run the job if an evaluation should have been created + // during the leader election period. At a high-level the logic to + // determine whether to force run a job is split based on whether an + // eval has been created for it. If so we check that since the last + // eval, should there have been a launch. If there is no eval, we check + // if there should have been a launch since the job was inserted. + evals, err := s.periodicDispatcher.CreatedEvals(job.ID) + if err != nil { + return fmt.Errorf("failed to get the evals created for periodic job %v: %v", + job.ID, err) + } + + // Determine if we need to force run by checking if a run should have + // occured since the last eval + if l := len(evals); l != 0 { + last = evals[l-1].JobLaunch + if !job.Periodic.Next(last).Before(now) { + continue + } + + goto FORCE + } + + // Determine if we need to force run by checking if a run should have + // occured since the job was added. + last = s.fsm.TimeTable().NearestTime(job.ModifyIndex) + // TODO(alex): Think about the 0 time case + if !job.Periodic.Next(last).Before(now) { + continue + } + + FORCE: + if err := s.periodicDispatcher.ForceRun(job.ID); err != nil { + s.logger.Printf( + "[ERR] nomad.periodic: force run of periodic job %q failed: %v", + job.ID, err) + return fmt.Errorf("failed for force run periodic job %q: %v", job.ID, err) + } + s.logger.Printf("[DEBUG] nomad.periodic: periodic job %q force"+ + " run during leadership establishment", job.ID) + } + return nil } diff --git a/nomad/leader_test.go b/nomad/leader_test.go index b753b41f4..13509d66c 100644 --- a/nomad/leader_test.go +++ b/nomad/leader_test.go @@ -286,6 +286,190 @@ func TestLeader_EvalBroker_Reset(t *testing.T) { }) } +func TestLeader_PeriodicDispatcher_Restore_Adds(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + defer s1.Shutdown() + + s2 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 + c.DevDisableBootstrap = true + }) + defer s2.Shutdown() + + s3 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 + c.DevDisableBootstrap = true + }) + defer s3.Shutdown() + servers := []*Server{s1, s2, s3} + testJoin(t, s1, s2, s3) + testutil.WaitForLeader(t, s1.RPC) + + for _, s := range servers { + testutil.WaitForResult(func() (bool, error) { + peers, _ := s.raftPeers.Peers() + return len(peers) == 3, nil + }, func(err error) { + t.Fatalf("should have 3 peers") + }) + } + + var leader *Server + for _, s := range servers { + if s.IsLeader() { + leader = s + break + } + } + if leader == nil { + t.Fatalf("Should have a leader") + } + + // Inject a periodic job and non-periodic job + periodic := mock.PeriodicJob() + nonPeriodic := mock.Job() + for _, job := range []*structs.Job{nonPeriodic, periodic} { + req := structs.JobRegisterRequest{ + Job: job, + } + _, _, err := leader.raftApply(structs.JobRegisterRequestType, req) + if err != nil { + t.Fatalf("err: %v", err) + } + } + + // Kill the leader + leader.Shutdown() + time.Sleep(100 * time.Millisecond) + + // Wait for a new leader + leader = nil + testutil.WaitForResult(func() (bool, error) { + for _, s := range servers { + if s.IsLeader() { + leader = s + return true, nil + } + } + return false, nil + }, func(err error) { + t.Fatalf("should have leader") + }) + + // Check that the new leader is tracking the periodic job. + testutil.WaitForResult(func() (bool, error) { + _, tracked := leader.periodicDispatcher.tracked[periodic.ID] + return tracked, nil + }, func(err error) { + t.Fatalf("periodic job not tracked") + }) +} + +func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + defer s1.Shutdown() + + // Inject a periodic job that will be triggered soon. + launch := time.Now().Add(1 * time.Second) + job := testPeriodicJob(launch) + req := structs.JobRegisterRequest{ + Job: job, + } + _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Flush the periodic dispatcher, ensuring that no evals will be created. + s1.periodicDispatcher.Flush() + + // Sleep till after the job should have been launched. + time.Sleep(2 * time.Second) + + // Restore the periodic dispatcher. + s1.restorePeriodicDispatcher() + + // Ensure the job is tracked. + if _, tracked := s1.periodicDispatcher.tracked[job.ID]; !tracked { + t.Fatalf("periodic job not restored") + } + + // Check that an eval was made. + evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + if err != nil { + t.Fatalf("CreatedEvals(%v) failed: %v", job.ID, err) + } + + if len(evals) != 1 { + t.Fatalf("restorePeriodicDispatcher() didn't create an eval") + } +} + +func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + defer s1.Shutdown() + + // Inject a periodic job that triggered once in the past, should trigger now + // and once in the future. + now := time.Now() + past := now.Add(-1 * time.Second) + future := now.Add(10 * time.Second) + job := testPeriodicJob(past, now, future) + req := structs.JobRegisterRequest{ + Job: job, + } + _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Create an eval for the past launch. + s1.periodicDispatcher.createEval(job, past) + + // Flush the periodic dispatcher, ensuring that no evals will be created. + s1.periodicDispatcher.Flush() + + // Sleep till after the job should have been launched. + time.Sleep(2 * time.Second) + + // Restore the periodic dispatcher. + s1.restorePeriodicDispatcher() + + // Ensure the job is tracked. + if _, tracked := s1.periodicDispatcher.tracked[job.ID]; !tracked { + t.Fatalf("periodic job not restored") + } + + // Check that an eval was made. + evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + if err != nil { + t.Fatalf("CreatedEvals(%v) failed: %v", job.ID, err) + } + + if len(evals) != 2 { + t.Fatalf("restorePeriodicDispatcher() didn't create an eval") + } + + // Check it was for the right time. + match := false + for _, eval := range evals { + if eval.JobLaunch != past && eval.JobLaunch != future { + match = true + break + } + } + + if !match { + t.Fatal("restorePeriodicDispatcher() didn't create the correct eval") + } +} + func TestLeader_PeriodicDispatch(t *testing.T) { s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 diff --git a/nomad/periodic.go b/nomad/periodic.go index 3d527a82e..e4e2bfb02 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -5,12 +5,21 @@ import ( "errors" "fmt" "log" + "sort" + "strconv" + "strings" "sync" "time" "github.com/hashicorp/nomad/nomad/structs" ) +const ( + // The string appended to the periodic jobs ID when launching derived + // instances of it. + JobLaunchSuffix = "-launch-" +) + // PeriodicRunner is the interface for tracking and launching periodic jobs at // their periodic spec. type PeriodicRunner interface { @@ -117,10 +126,12 @@ func (p *PeriodicDispatch) Add(job *structs.Job) error { if err := p.heap.Update(job, next); err != nil { return fmt.Errorf("failed to update job %v launch time: %v", job.ID, err) } + p.logger.Printf("[DEBUG] nomad.periodic: updated periodic job %q", job.ID) } else { if err := p.heap.Push(job, next); err != nil { return fmt.Errorf("failed to add job %v", job.ID, err) } + p.logger.Printf("[DEBUG] nomad.periodic: registered periodic job %q", job.ID) } // Signal an update. @@ -160,6 +171,7 @@ func (p *PeriodicDispatch) Remove(jobID string) error { } } + p.logger.Printf("[DEBUG] nomad.periodic: deregistered periodic job %q", jobID) return nil } @@ -192,13 +204,14 @@ func (p *PeriodicDispatch) run() { } p.l.RUnlock() - now := time.Now().Local() + var now time.Time PICK: // If there is nothing wait for an update. p.l.RLock() if p.heap.Length() == 0 { p.l.RUnlock() + p.logger.Printf("[DEBUG] nomad.periodic: no periodic jobs; waiting") <-p.updateCh p.l.RLock() } @@ -206,7 +219,7 @@ PICK: nextJob, err := p.heap.Peek() p.l.RUnlock() if err != nil { - p.logger.Printf("[ERR] nomad.periodic_dispatch: failed to determine next periodic job: %v", err) + p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) return } @@ -214,10 +227,15 @@ PICK: // If there are only invalid times, wait for an update. if launchTime.IsZero() { + p.logger.Printf("[DEBUG] nomad.periodic: job %q has no valid launch time", nextJob.job.ID) <-p.updateCh goto PICK } + now = time.Now() + p.logger.Printf("[DEBUG] nomad.periodic: launching job %q in %s", + nextJob.job.ID, nextJob.next.Sub(now)) + select { case <-p.stopCh: return @@ -237,7 +255,7 @@ PICK: j, err := p.heap.Peek() if err != nil { - p.logger.Printf("[ERR] nomad.periodic_dispatch: failed to determine next periodic job: %v", err) + p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) break } @@ -246,11 +264,10 @@ PICK: } if err := p.heap.Update(j.job, j.job.Periodic.Next(nowUpdate)); err != nil { - p.logger.Printf("[ERR] nomad.periodic_dispatch: failed to update next launch of periodic job: %v", j.job.ID, err) + p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %q: %v", j.job.ID, err) } - // TODO(alex): Want to be able to check that there isn't a previously - // running cron job for this job. + p.logger.Printf("[DEBUG] nomad.periodic: launching job %v at %v", j.job.ID, launchTime) go p.createEval(j.job, launchTime) } @@ -273,7 +290,7 @@ func (p *PeriodicDispatch) createEval(periodicJob *structs.Job, time time.Time) req := structs.JobRegisterRequest{Job: derived} _, index, err := p.srv.raftApply(structs.JobRegisterRequestType, req) if err != nil { - p.logger.Printf("[ERR] nomad.periodic_dispatch: Register failed: %v", err) + p.logger.Printf("[ERR] nomad.periodic: registering child job for periodic job %q failed: %v", periodicJob.ID, err) return err } @@ -296,7 +313,7 @@ func (p *PeriodicDispatch) createEval(periodicJob *structs.Job, time time.Time) // but that the EvalUpdate does not. _, _, err = p.srv.raftApply(structs.EvalUpdateRequestType, update) if err != nil { - p.logger.Printf("[ERR] nomad.periodic_dispatch: Eval create failed: %v", err) + p.logger.Printf("[ERR] nomad.periodic: creating eval for %q failed: %v", derived.ID, err) return err } @@ -312,7 +329,7 @@ func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) ( // Have to recover in case the job copy panics. defer func() { if r := recover(); r != nil { - p.logger.Printf("[ERR] nomad.periodic_dispatch: deriving job from"+ + p.logger.Printf("[ERR] nomad.periodic: deriving job from"+ " periodic job %v failed; deregistering from periodic runner: %v", periodicJob.ID, r) p.Remove(periodicJob.ID) @@ -326,7 +343,7 @@ func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) ( derived = periodicJob.Copy() derived.ParentID = periodicJob.ID derived.ID = p.derivedJobID(periodicJob, time) - derived.Name = periodicJob.ID + derived.Name = derived.ID derived.Periodic = nil return } @@ -334,32 +351,77 @@ func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) ( // deriveJobID returns a job ID based on the parent periodic job and the launch // time. func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time) string { - return fmt.Sprintf("%s-%d", periodicJob.ID, time.Unix()) + return fmt.Sprintf("%s%s%d", periodicJob.ID, JobLaunchSuffix, time.Unix()) } // CreatedEvals returns the set of evaluations created from the passed periodic -// job. -func (p *PeriodicDispatch) CreatedEvals(periodicJobID string) ([]*structs.Evaluation, error) { +// job in sorted order, with the earliest job launch first. +func (p *PeriodicDispatch) CreatedEvals(periodicJobID string) (PeriodicEvals, error) { state := p.srv.fsm.State() iter, err := state.ChildJobs(periodicJobID) if err != nil { return nil, fmt.Errorf("failed to look up children of job %v: %v", periodicJobID, err) } - var evals []*structs.Evaluation + var evals PeriodicEvals for i := iter.Next(); i != nil; i = iter.Next() { job := i.(*structs.Job) childEvals, err := state.EvalsByJob(job.ID) if err != nil { - fmt.Errorf("failed to look up evals for job %v: %v", job.ID, err) + return nil, fmt.Errorf("failed to look up evals for job %v: %v", job.ID, err) } - evals = append(evals, childEvals...) + for _, eval := range childEvals { + launch, err := p.evalLaunchTime(eval) + if err != nil { + return nil, fmt.Errorf("failed to get launch time for eval %v: %v", eval, err) + } + + pEval := &PeriodicEval{ + Eval: eval, + JobLaunch: launch, + } + + evals = append(evals, pEval) + } } + // Return the sorted evals. + sort.Sort(evals) return evals, nil } +// PeriodicEval stores the evaluation and launch time for an instantiated +// periodic job. +type PeriodicEval struct { + Eval *structs.Evaluation + JobLaunch time.Time +} + +type PeriodicEvals []*PeriodicEval + +func (p PeriodicEvals) Len() int { return len(p) } +func (p PeriodicEvals) Swap(i, j int) { p[i], p[j] = p[j], p[i] } +func (p PeriodicEvals) Less(i, j int) bool { return p[i].JobLaunch.Before(p[j].JobLaunch) } + +// evalLaunchTime returns the launch time of the job associated with the eval. +// This is only valid for evaluations created by PeriodicDispatch and will +// otherwise return an error. +func (p *PeriodicDispatch) evalLaunchTime(created *structs.Evaluation) (time.Time, error) { + jobID := created.JobID + index := strings.LastIndex(jobID, JobLaunchSuffix) + if index == -1 { + return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) + } + + launch, err := strconv.Atoi(jobID[index+len(JobLaunchSuffix):]) + if err != nil { + return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) + } + + return time.Unix(int64(launch), 0), nil +} + // Flush clears the state of the PeriodicDispatcher func (p *PeriodicDispatch) Flush() { p.l.Lock() diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index f199038f3..f6382d908 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -63,6 +63,19 @@ func (m *MockPeriodic) Tracked() []structs.Job { return tracked } +func testPeriodicJob(times ...time.Time) *structs.Job { + job := mock.PeriodicJob() + job.Periodic.SpecType = structs.PeriodicSpecTest + + l := make([]string, len(times)) + for i, t := range times { + l[i] = strconv.Itoa(int(t.Unix())) + } + + job.Periodic.Spec = strings.Join(l, ",") + return job +} + func TestPeriodicDispatch_DisabledOperations(t *testing.T) { s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue @@ -171,7 +184,7 @@ func TestPeriodicDispatch_Add_TriggersUpdate(t *testing.T) { t.Fatalf("Unexpected number of evals created; got %#v; want 1", evals) } - eval := evals[0] + eval := evals[0].Eval expID := s1.periodicDispatcher.derivedJobID(job, expected) if eval.JobID != expID { t.Fatalf("periodic dispatcher created eval at the wrong time; got %v; want %v", @@ -331,8 +344,8 @@ func TestPeriodicDispatch_Run_Multiple(t *testing.T) { d := s1.periodicDispatcher expected := []string{d.derivedJobID(job, launch1), d.derivedJobID(job, launch2)} for i, eval := range evals { - if eval.JobID != expected[i] { - t.Fatalf("eval created incorrectly; got %v; want %v", eval.JobID, expected[i]) + if eval.Eval.JobID != expected[i] { + t.Fatalf("eval created incorrectly; got %v; want %v", eval.Eval.JobID, expected[i]) } } } @@ -375,8 +388,8 @@ func TestPeriodicDispatch_Run_SameTime(t *testing.T) { d := s1.periodicDispatcher expected := d.derivedJobID(job, launch) - if evals[0].JobID != expected { - t.Fatalf("eval created incorrectly; got %v; want %v", evals[0].JobID, expected) + if evals[0].Eval.JobID != expected { + t.Fatalf("eval created incorrectly; got %v; want %v", evals[0].Eval.JobID, expected) } } } @@ -464,7 +477,7 @@ func TestPeriodicDispatch_Complex(t *testing.T) { var jobs []string for _, eval := range evals { - jobs = append(jobs, eval.JobID) + jobs = append(jobs, eval.Eval.JobID) } actual[job.ID] = jobs } @@ -482,17 +495,36 @@ func shuffle(jobs []*structs.Job) { } } -func testPeriodicJob(times ...time.Time) *structs.Job { - job := mock.PeriodicJob() - job.Periodic.SpecType = structs.PeriodicSpecTest +func TestPeriodicDispatch_CreatedEvals(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) - l := make([]string, len(times)) - for i, t := range times { - l[i] = strconv.Itoa(int(t.Unix())) + // Create three evals. + job := mock.PeriodicJob() + now := time.Now().Round(time.Second) + times := []time.Time{now.Add(1 * time.Second), now.Add(2 * time.Second), now} + for _, time := range times { + if err := s1.periodicDispatcher.createEval(job, time); err != nil { + t.Fatalf("createEval() failed: %v", err) + } + } + + // Get the created evals. + created, err := s1.periodicDispatcher.CreatedEvals(job.ID) + if err != nil { + t.Fatalf("CreatedEvals(%v) failed: %v", job.ID, err) + } + expected := []time.Time{times[2], times[0], times[1]} + for i, c := range created { + if c.JobLaunch != expected[i] { + t.Fatalf("CreatedEvals are in wrong order; got %v; want %v at index %d", + c.JobLaunch, expected[i], i) + } } - job.Periodic.Spec = strings.Join(l, ",") - return job } // TODO: Check that it doesn't create evals for overlapping things. diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index db1fd8c79..7834fb610 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -863,9 +863,15 @@ func (j *Job) Validate() error { } // Validate periodic is only used with batch jobs. - if j.Periodic != nil && j.Periodic.Enabled && j.Type != JobTypeBatch { - mErr.Errors = append(mErr.Errors, - fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) + if j.IsPeriodic() { + if j.Type != JobTypeBatch { + mErr.Errors = append(mErr.Errors, + fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) + } + + if err := j.Periodic.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) + } } return mErr.ErrorOrNil() diff --git a/nomad/timetable.go b/nomad/timetable.go index 38344f79a..36076ce4a 100644 --- a/nomad/timetable.go +++ b/nomad/timetable.go @@ -64,7 +64,7 @@ func (t *TimeTable) Deserialize(dec *codec.Decoder) error { return nil } -// Witness is used to witness a new inde and time. +// Witness is used to witness a new index and time. func (t *TimeTable) Witness(index uint64, when time.Time) { t.l.Lock() defer t.l.Unlock() From 610cfe4b345b88d850cf50afcd3494fa698870e2 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 4 Dec 2015 16:53:36 -0800 Subject: [PATCH 19/45] Small fixes and test fixes --- nomad/job_endpoint_test.go | 12 +++------- nomad/leader_test.go | 14 +++++++---- nomad/mock/mock.go | 3 ++- nomad/periodic.go | 48 +++++++++++++++++++++++++------------- nomad/periodic_test.go | 34 +++++++++++---------------- 5 files changed, 61 insertions(+), 50 deletions(-) diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index d1dc0948a..b9bb9a80e 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -242,9 +242,7 @@ func TestJobEndpoint_Register_Periodic(t *testing.T) { testutil.WaitForLeader(t, s1.RPC) // Create the register request for a periodic job. - job := mock.Job() - job.Type = structs.JobTypeBatch - job.Periodic = &structs.PeriodicConfig{Enabled: true} + job := mock.PeriodicJob() req := &structs.JobRegisterRequest{ Job: job, WriteRequest: structs.WriteRequest{Region: "global"}, @@ -362,9 +360,7 @@ func TestJobEndpoint_Evaluate_Periodic(t *testing.T) { testutil.WaitForLeader(t, s1.RPC) // Create the register request - job := mock.Job() - job.Type = structs.JobTypeBatch - job.Periodic = &structs.PeriodicConfig{Enabled: true} + job := mock.PeriodicJob() req := &structs.JobRegisterRequest{ Job: job, WriteRequest: structs.WriteRequest{Region: "global"}, @@ -476,9 +472,7 @@ func TestJobEndpoint_Deregister_Periodic(t *testing.T) { testutil.WaitForLeader(t, s1.RPC) // Create the register request - job := mock.Job() - job.Type = structs.JobTypeBatch - job.Periodic = &structs.PeriodicConfig{Enabled: true} + job := mock.PeriodicJob() reg := &structs.JobRegisterRequest{ Job: job, WriteRequest: structs.WriteRequest{Region: "global"}, diff --git a/nomad/leader_test.go b/nomad/leader_test.go index 13509d66c..186f5d48e 100644 --- a/nomad/leader_test.go +++ b/nomad/leader_test.go @@ -372,6 +372,7 @@ func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { c.NumSchedulers = 0 }) defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) // Inject a periodic job that will be triggered soon. launch := time.Now().Add(1 * time.Second) @@ -385,12 +386,14 @@ func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { } // Flush the periodic dispatcher, ensuring that no evals will be created. - s1.periodicDispatcher.Flush() + s1.periodicDispatcher.SetEnabled(false) // Sleep till after the job should have been launched. - time.Sleep(2 * time.Second) + time.Sleep(3 * time.Second) // Restore the periodic dispatcher. + s1.periodicDispatcher.SetEnabled(true) + s1.periodicDispatcher.Start() s1.restorePeriodicDispatcher() // Ensure the job is tracked. @@ -414,6 +417,7 @@ func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) { c.NumSchedulers = 0 }) defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) // Inject a periodic job that triggered once in the past, should trigger now // and once in the future. @@ -433,12 +437,14 @@ func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) { s1.periodicDispatcher.createEval(job, past) // Flush the periodic dispatcher, ensuring that no evals will be created. - s1.periodicDispatcher.Flush() + s1.periodicDispatcher.SetEnabled(false) // Sleep till after the job should have been launched. - time.Sleep(2 * time.Second) + time.Sleep(3 * time.Second) // Restore the periodic dispatcher. + s1.periodicDispatcher.SetEnabled(true) + s1.periodicDispatcher.Start() s1.restorePeriodicDispatcher() // Ensure the job is tracked. diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 0d61009a3..3408e0f97 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -192,10 +192,11 @@ func SystemJob() *structs.Job { func PeriodicJob() *structs.Job { job := Job() + job.Type = structs.JobTypeBatch job.Periodic = &structs.PeriodicConfig{ Enabled: true, SpecType: structs.PeriodicSpecCron, - Spec: "*/30 * * *", + Spec: "*/30 * * * *", } return job } diff --git a/nomad/periodic.go b/nomad/periodic.go index e4e2bfb02..c4f062c26 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -45,6 +45,7 @@ type PeriodicDispatch struct { updateCh chan struct{} stopCh chan struct{} + waitCh chan struct{} logger *log.Logger l sync.RWMutex } @@ -57,7 +58,8 @@ func NewPeriodicDispatch(srv *Server) *PeriodicDispatch { tracked: make(map[string]*structs.Job), heap: NewPeriodicHeap(), updateCh: make(chan struct{}, 1), - stopCh: make(chan struct{}, 1), + stopCh: make(chan struct{}), + waitCh: make(chan struct{}), logger: srv.logger, } } @@ -70,7 +72,8 @@ func (p *PeriodicDispatch) SetEnabled(enabled bool) { p.enabled = enabled p.l.Unlock() if !enabled { - p.stopCh <- struct{}{} + close(p.stopCh) + <-p.waitCh p.Flush() } } @@ -196,6 +199,8 @@ func (p *PeriodicDispatch) ForceRun(jobID string) error { // run is a long-lived function that waits til a job's periodic spec is met and // then creates an evaluation to run the job. func (p *PeriodicDispatch) run() { + defer close(p.waitCh) + // Do nothing if not enabled. p.l.RLock() if !p.enabled { @@ -212,15 +217,24 @@ PICK: if p.heap.Length() == 0 { p.l.RUnlock() p.logger.Printf("[DEBUG] nomad.periodic: no periodic jobs; waiting") - <-p.updateCh + select { + case <-p.stopCh: + return + case <-p.updateCh: + } p.l.RLock() } nextJob, err := p.heap.Peek() p.l.RUnlock() if err != nil { - p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) - return + select { + case <-p.stopCh: + return + default: + p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) + return + } } launchTime := nextJob.next @@ -228,8 +242,12 @@ PICK: // If there are only invalid times, wait for an update. if launchTime.IsZero() { p.logger.Printf("[DEBUG] nomad.periodic: job %q has no valid launch time", nextJob.job.ID) - <-p.updateCh - goto PICK + select { + case <-p.stopCh: + return + case <-p.updateCh: + goto PICK + } } now = time.Now() @@ -426,8 +444,9 @@ func (p *PeriodicDispatch) evalLaunchTime(created *structs.Evaluation) (time.Tim func (p *PeriodicDispatch) Flush() { p.l.Lock() defer p.l.Unlock() - p.stopCh = make(chan struct{}, 1) + p.stopCh = make(chan struct{}) p.updateCh = make(chan struct{}, 1) + p.waitCh = make(chan struct{}) p.tracked = make(map[string]*structs.Job) p.heap = NewPeriodicHeap() } @@ -486,8 +505,11 @@ func (p *periodicHeap) Contains(job *structs.Job) bool { } func (p *periodicHeap) Update(job *structs.Job, next time.Time) error { - if job, ok := p.index[job.ID]; ok { - p.heap.update(job, next) + if pJob, ok := p.index[job.ID]; ok { + // Need to update the job as well because its spec can change. + pJob.job = job + pJob.next = next + heap.Fix(&p.heap, pJob.index) return nil } @@ -550,9 +572,3 @@ func (h *periodicHeapImp) Pop() interface{} { *h = old[0 : n-1] return job } - -// update modifies the priority and next time of an periodic job in the queue. -func (h *periodicHeapImp) update(job *periodicJob, next time.Time) { - job.next = next - heap.Fix(h, job.index) -} diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index f6382d908..abe05b088 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -77,6 +77,7 @@ func testPeriodicJob(times ...time.Time) *structs.Job { } func TestPeriodicDispatch_DisabledOperations(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) @@ -97,6 +98,7 @@ func TestPeriodicDispatch_DisabledOperations(t *testing.T) { } func TestPeriodicDispatch_Add_NonPeriodic(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) @@ -115,6 +117,7 @@ func TestPeriodicDispatch_Add_NonPeriodic(t *testing.T) { } func TestPeriodicDispatch_Add_UpdateJob(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) @@ -148,15 +151,13 @@ func TestPeriodicDispatch_Add_UpdateJob(t *testing.T) { } func TestPeriodicDispatch_Add_TriggersUpdate(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) - // Start the periodic dispatcher. - s1.periodicDispatcher.Start() - // Create a job that won't be evalauted for a while. job := testPeriodicJob(time.Now().Add(10 * time.Second)) @@ -193,6 +194,7 @@ func TestPeriodicDispatch_Add_TriggersUpdate(t *testing.T) { } func TestPeriodicDispatch_Remove_Untracked(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) @@ -205,6 +207,7 @@ func TestPeriodicDispatch_Remove_Untracked(t *testing.T) { } func TestPeriodicDispatch_Remove_Tracked(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) @@ -232,15 +235,13 @@ func TestPeriodicDispatch_Remove_Tracked(t *testing.T) { } func TestPeriodicDispatch_Remove_TriggersUpdate(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) - // Start the periodic dispatcher. - s1.periodicDispatcher.Start() - // Create a job that will be evaluated soon. job := testPeriodicJob(time.Now().Add(1 * time.Second)) @@ -268,6 +269,7 @@ func TestPeriodicDispatch_Remove_TriggersUpdate(t *testing.T) { } func TestPeriodicDispatch_ForceRun_Untracked(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) @@ -280,15 +282,13 @@ func TestPeriodicDispatch_ForceRun_Untracked(t *testing.T) { } func TestPeriodicDispatch_ForceRun_Tracked(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) - // Start the periodic dispatcher. - s1.periodicDispatcher.Start() - // Create a job that won't be evalauted for a while. job := testPeriodicJob(time.Now().Add(10 * time.Second)) @@ -314,15 +314,13 @@ func TestPeriodicDispatch_ForceRun_Tracked(t *testing.T) { } func TestPeriodicDispatch_Run_Multiple(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) - // Start the periodic dispatcher. - s1.periodicDispatcher.Start() - // Create a job that will be launched twice. launch1 := time.Now().Add(1 * time.Second) launch2 := time.Now().Add(2 * time.Second) @@ -351,15 +349,13 @@ func TestPeriodicDispatch_Run_Multiple(t *testing.T) { } func TestPeriodicDispatch_Run_SameTime(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) - // Start the periodic dispatcher. - s1.periodicDispatcher.Start() - // Create two job that will be launched at the same time. launch := time.Now().Add(1 * time.Second) job := testPeriodicJob(launch) @@ -398,15 +394,13 @@ func TestPeriodicDispatch_Run_SameTime(t *testing.T) { // some after each other and some invalid times, and ensures the correct // behavior. func TestPeriodicDispatch_Complex(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) - // Start the periodic dispatcher. - s1.periodicDispatcher.Start() - // Create some jobs launching at different times. now := time.Now() same := now.Add(1 * time.Second) @@ -496,6 +490,7 @@ func shuffle(jobs []*structs.Job) { } func TestPeriodicDispatch_CreatedEvals(t *testing.T) { + t.Parallel() s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue }) @@ -527,9 +522,8 @@ func TestPeriodicDispatch_CreatedEvals(t *testing.T) { } -// TODO: Check that it doesn't create evals for overlapping things. - func TestPeriodicHeap_Order(t *testing.T) { + t.Parallel() h := NewPeriodicHeap() j1 := mock.PeriodicJob() j2 := mock.PeriodicJob() From ea799b88cbb609afdf6eae7ebd1638b5a427fe20 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 18 Dec 2015 17:51:30 -0800 Subject: [PATCH 20/45] merge --- jobspec/parse.go | 4 +- jobspec/test-fixtures/periodic-cron.hcl | 2 +- nomad/fsm.go | 54 +++++++++- nomad/fsm_test.go | 29 ++++-- nomad/leader.go | 32 +----- nomad/periodic.go | 24 +++-- nomad/periodic_test.go | 25 +---- nomad/state/schema.go | 23 +++++ nomad/state/state_store.go | 74 ++++++++++++++ nomad/state/state_store_test.go | 117 ++++++++++++++++++++++ nomad/structs/structs.go | 6 ++ website/source/docs/jobspec/index.html.md | 18 ++++ 12 files changed, 336 insertions(+), 72 deletions(-) diff --git a/jobspec/parse.go b/jobspec/parse.go index 963c0b3e4..3c61fdd3f 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -698,8 +698,8 @@ func parsePeriodic(result **structs.PeriodicConfig, list *ast.ObjectList) error m["Enabled"] = enabled } - // If "cron_spec" is provided, set the type to "cron" and store the spec. - if cron, ok := m["cron_spec"]; ok { + // If "cron" is provided, set the type to "cron" and store the spec. + if cron, ok := m["cron"]; ok { m["SpecType"] = structs.PeriodicSpecCron m["Spec"] = cron } diff --git a/jobspec/test-fixtures/periodic-cron.hcl b/jobspec/test-fixtures/periodic-cron.hcl index 2b1bd2b39..c463cc4f1 100644 --- a/jobspec/test-fixtures/periodic-cron.hcl +++ b/jobspec/test-fixtures/periodic-cron.hcl @@ -1,5 +1,5 @@ job "foo" { periodic { - cron_spec = "*/5 * * *" + cron = "*/5 * * *" } } diff --git a/nomad/fsm.go b/nomad/fsm.go index ca81f0a77..af1799404 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -207,11 +207,46 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { return err } + // If it is periodic, insert it into the periodic runner and record the + // time it was inserted. if req.Job.IsPeriodic() { if err := n.periodicRunner.Add(req.Job); err != nil { n.logger.Printf("[ERR] nomad.fsm: PeriodicRunner.Add failed: %v", err) return err } + + // Record the insertion time as a launch. + launch := &structs.PeriodicLaunch{req.Job.ID, time.Now()} + if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { + n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) + return err + } + } + + // Check if the parent job is periodic and mark the launch time. + parentID := req.Job.ParentID + if parentID != "" { + parent, err := n.state.JobByID(parentID) + if err != nil { + n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err) + return err + } else if parent == nil { + // The parent has been deregistered. + return nil + } + + if parent.IsPeriodic() { + t, err := n.periodicRunner.LaunchTime(req.Job.ID) + if err != nil { + n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) + return err + } + launch := &structs.PeriodicLaunch{parentID, t} + if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { + n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) + return err + } + } } return nil @@ -224,14 +259,27 @@ func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { panic(fmt.Errorf("failed to decode request: %v", err)) } + job, err := n.state.JobByID(req.JobID) + if err != nil { + n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) + return err + } + if err := n.state.DeleteJob(index, req.JobID); err != nil { n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) return err } - if err := n.periodicRunner.Remove(req.JobID); err != nil { - n.logger.Printf("[ERR] nomad.fsm: PeriodicRunner.Remove failed: %v", err) - return err + if job.IsPeriodic() { + if err := n.periodicRunner.Remove(req.JobID); err != nil { + n.logger.Printf("[ERR] nomad.fsm: PeriodicRunner.Remove failed: %v", err) + return err + } + + if err := n.state.DeletePeriodicLaunch(index, req.JobID); err != nil { + n.logger.Printf("[ERR] nomad.fsm: DeletePeriodicLaunch failed: %v", err) + return err + } } return nil diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index eaba36917..d89ee67f8 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -222,9 +222,7 @@ func TestFSM_UpdateNodeDrain(t *testing.T) { func TestFSM_RegisterJob(t *testing.T) { fsm := testFSM(t) - job := mock.Job() - job.Type = structs.JobTypeBatch - job.Periodic = &structs.PeriodicConfig{Enabled: true} + job := mock.PeriodicJob() req := structs.JobRegisterRequest{ Job: job, } @@ -254,14 +252,24 @@ func TestFSM_RegisterJob(t *testing.T) { if _, ok := fsm.periodicRunner.(*MockPeriodic).Jobs[job.ID]; !ok { t.Fatal("job not added to periodic runner") } + + // Verify the launch time was tracked. + launchOut, err := fsm.State().PeriodicLaunchByID(req.Job.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + if launchOut == nil { + t.Fatalf("not found!") + } + if launchOut.Launch.IsZero() { + t.Fatalf("bad launch time: %v", launchOut.Launch) + } } func TestFSM_DeregisterJob(t *testing.T) { fsm := testFSM(t) - job := mock.Job() - job.Type = structs.JobTypeBatch - job.Periodic = &structs.PeriodicConfig{Enabled: true} + job := mock.PeriodicJob() req := structs.JobRegisterRequest{ Job: job, } @@ -301,6 +309,15 @@ func TestFSM_DeregisterJob(t *testing.T) { if _, ok := fsm.periodicRunner.(*MockPeriodic).Jobs[job.ID]; ok { t.Fatal("job not removed from periodic runner") } + + // Verify it was removed from the periodic launch table. + launchOut, err := fsm.State().PeriodicLaunchByID(req.Job.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + if launchOut != nil { + t.Fatalf("launch found!") + } } func TestFSM_UpdateEval(t *testing.T) { diff --git a/nomad/leader.go b/nomad/leader.go index 948ca219f..f1ec1a3b3 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -188,43 +188,19 @@ func (s *Server) restorePeriodicDispatcher() error { } now := time.Now() - var last time.Time for i := iter.Next(); i != nil; i = iter.Next() { job := i.(*structs.Job) s.periodicDispatcher.Add(job) - // Need to force run the job if an evaluation should have been created - // during the leader election period. At a high-level the logic to - // determine whether to force run a job is split based on whether an - // eval has been created for it. If so we check that since the last - // eval, should there have been a launch. If there is no eval, we check - // if there should have been a launch since the job was inserted. - evals, err := s.periodicDispatcher.CreatedEvals(job.ID) - if err != nil { - return fmt.Errorf("failed to get the evals created for periodic job %v: %v", - job.ID, err) + launch, err := s.fsm.State().PeriodicLaunchByID(job.ID) + if err != nil || launch == nil { + return fmt.Errorf("failed to get periodic launch time: %v", err) } - // Determine if we need to force run by checking if a run should have - // occured since the last eval - if l := len(evals); l != 0 { - last = evals[l-1].JobLaunch - if !job.Periodic.Next(last).Before(now) { - continue - } - - goto FORCE - } - - // Determine if we need to force run by checking if a run should have - // occured since the job was added. - last = s.fsm.TimeTable().NearestTime(job.ModifyIndex) - // TODO(alex): Think about the 0 time case - if !job.Periodic.Next(last).Before(now) { + if !job.Periodic.Next(launch.Launch).Before(now) { continue } - FORCE: if err := s.periodicDispatcher.ForceRun(job.ID); err != nil { s.logger.Printf( "[ERR] nomad.periodic: force run of periodic job %q failed: %v", diff --git a/nomad/periodic.go b/nomad/periodic.go index c4f062c26..f0cee38ba 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -17,7 +17,7 @@ import ( const ( // The string appended to the periodic jobs ID when launching derived // instances of it. - JobLaunchSuffix = "-launch-" + JobLaunchSuffix = "/periodic-" ) // PeriodicRunner is the interface for tracking and launching periodic jobs at @@ -30,6 +30,7 @@ type PeriodicRunner interface { ForceRun(jobID string) error Tracked() []structs.Job Flush() + LaunchTime(jobID string) (time.Time, error) } // PeriodicDispatch is used to track and launch periodic jobs. It maintains the @@ -72,8 +73,10 @@ func (p *PeriodicDispatch) SetEnabled(enabled bool) { p.enabled = enabled p.l.Unlock() if !enabled { - close(p.stopCh) - <-p.waitCh + if p.running { + close(p.stopCh) + <-p.waitCh + } p.Flush() } } @@ -107,7 +110,7 @@ func (p *PeriodicDispatch) Add(job *structs.Job) error { // Do nothing if not enabled if !p.enabled { - return fmt.Errorf("periodic dispatch disabled") + return nil } // If we were tracking a job and it has been disabled or made non-periodic remove it. @@ -156,7 +159,7 @@ func (p *PeriodicDispatch) Remove(jobID string) error { // Do nothing if not enabled if !p.enabled { - return fmt.Errorf("periodic dispatch disabled") + return nil } if job, tracked := p.tracked[jobID]; tracked { @@ -374,6 +377,7 @@ func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time // CreatedEvals returns the set of evaluations created from the passed periodic // job in sorted order, with the earliest job launch first. +// TODO: Get rid of this func (p *PeriodicDispatch) CreatedEvals(periodicJobID string) (PeriodicEvals, error) { state := p.srv.fsm.State() iter, err := state.ChildJobs(periodicJobID) @@ -390,7 +394,7 @@ func (p *PeriodicDispatch) CreatedEvals(periodicJobID string) (PeriodicEvals, er } for _, eval := range childEvals { - launch, err := p.evalLaunchTime(eval) + launch, err := p.LaunchTime(eval.JobID) if err != nil { return nil, fmt.Errorf("failed to get launch time for eval %v: %v", eval, err) } @@ -422,11 +426,9 @@ func (p PeriodicEvals) Len() int { return len(p) } func (p PeriodicEvals) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p PeriodicEvals) Less(i, j int) bool { return p[i].JobLaunch.Before(p[j].JobLaunch) } -// evalLaunchTime returns the launch time of the job associated with the eval. -// This is only valid for evaluations created by PeriodicDispatch and will -// otherwise return an error. -func (p *PeriodicDispatch) evalLaunchTime(created *structs.Evaluation) (time.Time, error) { - jobID := created.JobID +// LaunchTime returns the launch time of the job. This is only valid for +// jobs created by PeriodicDispatch and will otherwise return an error. +func (p *PeriodicDispatch) LaunchTime(jobID string) (time.Time, error) { index := strings.LastIndex(jobID, JobLaunchSuffix) if index == -1 { return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index abe05b088..db6594634 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -47,6 +47,10 @@ func (m *MockPeriodic) ForceRun(jobID string) error { return nil } +func (m *MockPeriodic) LaunchTime(jobID string) (time.Time, error) { + return time.Time{}, nil +} + func (m *MockPeriodic) Start() {} func (m *MockPeriodic) Flush() { @@ -76,27 +80,6 @@ func testPeriodicJob(times ...time.Time) *structs.Job { return job } -func TestPeriodicDispatch_DisabledOperations(t *testing.T) { - t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) - - // Disable the dispatcher. - s1.periodicDispatcher.SetEnabled(false) - - job := mock.PeriodicJob() - if err := s1.periodicDispatcher.Add(job); err == nil { - t.Fatalf("Add on disabled dispatcher should fail") - } - - if err := s1.periodicDispatcher.Remove(job.ID); err == nil { - t.Fatalf("Remove on disabled dispatcher should fail") - } -} - func TestPeriodicDispatch_Add_NonPeriodic(t *testing.T) { t.Parallel() s1 := testServer(t, func(c *Config) { diff --git a/nomad/state/schema.go b/nomad/state/schema.go index d3119016b..0ff2ad58a 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -19,6 +19,7 @@ func stateStoreSchema() *memdb.DBSchema { indexTableSchema, nodeTableSchema, jobTableSchema, + periodicLaunchTableSchema, evalTableSchema, allocTableSchema, } @@ -141,6 +142,28 @@ func jobIsGCable(obj interface{}) (bool, error) { return j.GC, nil } +// periodicLaunchTableSchema returns the MemDB schema tracking the most recent +// launch time for a perioidic job. +func periodicLaunchTableSchema() *memdb.TableSchema { + return &memdb.TableSchema{ + Name: "periodic_launch", + Indexes: map[string]*memdb.IndexSchema{ + // Primary index is used for job management + // and simple direct lookup. ID is required to be + // unique. + "id": &memdb.IndexSchema{ + Name: "id", + AllowMissing: false, + Unique: true, + Indexer: &memdb.StringFieldIndex{ + Field: "ID", + Lowercase: true, + }, + }, + }, + } +} + // evalTableSchema returns the MemDB schema for the eval table. // This table is used to store all the evaluations that are pending // or recently completed. diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index f0953d403..103c08a8b 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -408,6 +408,80 @@ func (s *StateStore) JobsByGC(gc bool) (memdb.ResultIterator, error) { return iter, nil } +// UpsertPeriodicLaunch is used to register a launch or update it. +func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { + txn := s.db.Txn(true) + defer txn.Abort() + + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "periodic_launch"}) + watcher.Add(watch.Item{Job: launch.ID}) + + // Check if the job already exists + if _, err := txn.First("periodic_launch", "id", launch.ID); err != nil { + return fmt.Errorf("periodic launch lookup failed: %v", err) + } + + // Insert the job + if err := txn.Insert("periodic_launch", launch); err != nil { + return fmt.Errorf("launch insert failed: %v", err) + } + if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { + return fmt.Errorf("index update failed: %v", err) + } + + txn.Defer(func() { s.watch.notify(watcher) }) + txn.Commit() + return nil +} + +// DeletePeriodicLaunch is used to delete the periodic launch +func (s *StateStore) DeletePeriodicLaunch(index uint64, jobID string) error { + txn := s.db.Txn(true) + defer txn.Abort() + + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "periodic_launch"}) + watcher.Add(watch.Item{Job: jobID}) + + // Lookup the launch + existing, err := txn.First("periodic_launch", "id", jobID) + if err != nil { + return fmt.Errorf("launch lookup failed: %v", err) + } + if existing == nil { + return fmt.Errorf("launch not found") + } + + // Delete the launch + if err := txn.Delete("periodic_launch", existing); err != nil { + return fmt.Errorf("launch delete failed: %v", err) + } + if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { + return fmt.Errorf("index update failed: %v", err) + } + + txn.Defer(func() { s.watch.notify(watcher) }) + txn.Commit() + return nil +} + +// PeriodicLaunchByID is used to lookup a periodic launch by the periodic job +// ID. +func (s *StateStore) PeriodicLaunchByID(id string) (*structs.PeriodicLaunch, error) { + txn := s.db.Txn(false) + + existing, err := txn.First("periodic_launch", "id", id) + if err != nil { + return nil, fmt.Errorf("periodic launch lookup failed: %v", err) + } + + if existing != nil { + return existing.(*structs.PeriodicLaunch), nil + } + return nil, nil +} + // UpsertEvaluation is used to upsert an evaluation func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { txn := s.db.Txn(true) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index a1504e936..5e27f0112 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -5,6 +5,7 @@ import ( "reflect" "sort" "testing" + "time" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" @@ -668,6 +669,122 @@ func TestStateStore_RestoreJob(t *testing.T) { notify.verify(t) } +func TestStateStore_UpsertPeriodicLaunch(t *testing.T) { + state := testStateStore(t) + job := mock.Job() + launch := &structs.PeriodicLaunch{job.ID, time.Now()} + + notify := setupNotifyTest( + state, + watch.Item{Table: "periodic_launch"}, + watch.Item{Job: job.ID}) + + err := state.UpsertPeriodicLaunch(1000, launch) + if err != nil { + t.Fatalf("err: %v", err) + } + + out, err := state.PeriodicLaunchByID(job.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + + if !reflect.DeepEqual(launch, out) { + t.Fatalf("bad: %#v %#v", job, out) + } + + index, err := state.Index("periodic_launch") + if err != nil { + t.Fatalf("err: %v", err) + } + if index != 1000 { + t.Fatalf("bad: %d", index) + } + + notify.verify(t) +} + +func TestStateStore_UpdateUpsertPeriodicLaunch(t *testing.T) { + state := testStateStore(t) + job := mock.Job() + launch := &structs.PeriodicLaunch{job.ID, time.Now()} + + notify := setupNotifyTest( + state, + watch.Item{Table: "periodic_launch"}, + watch.Item{Job: job.ID}) + + err := state.UpsertPeriodicLaunch(1000, launch) + if err != nil { + t.Fatalf("err: %v", err) + } + + launch2 := &structs.PeriodicLaunch{job.ID, launch.Launch.Add(1 * time.Second)} + err = state.UpsertPeriodicLaunch(1001, launch2) + if err != nil { + t.Fatalf("err: %v", err) + } + + out, err := state.PeriodicLaunchByID(job.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + + if !reflect.DeepEqual(launch2, out) { + t.Fatalf("bad: %#v %#v", launch2, out) + } + + index, err := state.Index("periodic_launch") + if err != nil { + t.Fatalf("err: %v", err) + } + if index != 1001 { + t.Fatalf("bad: %d", index) + } + + notify.verify(t) +} + +func TestStateStore_DeletePeriodicLaunch(t *testing.T) { + state := testStateStore(t) + job := mock.Job() + launch := &structs.PeriodicLaunch{job.ID, time.Now()} + + notify := setupNotifyTest( + state, + watch.Item{Table: "periodic_launch"}, + watch.Item{Job: job.ID}) + + err := state.UpsertPeriodicLaunch(1000, launch) + if err != nil { + t.Fatalf("err: %v", err) + } + + err = state.DeletePeriodicLaunch(1001, job.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + + out, err := state.PeriodicLaunchByID(job.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + + if out != nil { + t.Fatalf("bad: %#v %#v", job, out) + } + + index, err := state.Index("periodic_launch") + if err != nil { + t.Fatalf("err: %v", err) + } + if index != 1001 { + t.Fatalf("bad: %d", index) + } + + notify.verify(t) +} + func TestStateStore_Indexes(t *testing.T) { state := testStateStore(t) node := mock.Node() diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 7834fb610..a33befbd3 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1017,6 +1017,12 @@ func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { return time.Time{} } +// PeriodicLaunch tracks the last launch time of a periodic job. +type PeriodicLaunch struct { + ID string // ID of the periodic job. + Launch time.Time // The last launch time. +} + var ( defaultServiceJobRestartPolicy = RestartPolicy{ Delay: 15 * time.Second, diff --git a/website/source/docs/jobspec/index.html.md b/website/source/docs/jobspec/index.html.md index f55251c2b..f3c2a4435 100644 --- a/website/source/docs/jobspec/index.html.md +++ b/website/source/docs/jobspec/index.html.md @@ -156,6 +156,24 @@ The `job` object supports the following keys: and "h" suffix can be used, such as "30s". Both values default to zero, which disables rolling updates. +* `periodic` - `periodic` allows the job to be scheduled at fixed times, dates + or intervals. The `periodic` block has the following configuration: + + ``` + periodic { + // Enabled is defaulted to true if the block is included. It can be set + // to false to pause the periodic job from running. + enabled = true + + // A cron expression configuring the interval the job is launched at. + // Supports predefined expressions such as "@daily" and "@weekly" + cron = "*/15 * * * * *" + } + ``` + + `cron`: See [here](https://github.com/gorhill/cronexpr#implementation) + for full documentation of supported cron specs and the predefined expressions. + ### Task Group The `group` object supports the following keys: From ca65daf4c0bb3eedb05007d34a29bab31b6eb074 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 7 Dec 2015 14:24:06 -0800 Subject: [PATCH 21/45] move created evals to the test package --- nomad/leader_test.go | 8 +-- nomad/periodic.go | 54 +------------------- nomad/periodic_test.go | 109 ++++++++++++++++++++++++----------------- 3 files changed, 69 insertions(+), 102 deletions(-) diff --git a/nomad/leader_test.go b/nomad/leader_test.go index 186f5d48e..4c035efcd 100644 --- a/nomad/leader_test.go +++ b/nomad/leader_test.go @@ -402,9 +402,9 @@ func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { } // Check that an eval was made. - evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + evals, err := createdEvals(s1.periodicDispatcher, job.ID) if err != nil { - t.Fatalf("CreatedEvals(%v) failed: %v", job.ID, err) + t.Fatalf("createdEvals(%v) failed: %v", job.ID, err) } if len(evals) != 1 { @@ -453,9 +453,9 @@ func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) { } // Check that an eval was made. - evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + evals, err := createdEvals(s1.periodicDispatcher, job.ID) if err != nil { - t.Fatalf("CreatedEvals(%v) failed: %v", job.ID, err) + t.Fatalf("createdEvals(%v) failed: %v", job.ID, err) } if len(evals) != 2 { diff --git a/nomad/periodic.go b/nomad/periodic.go index f0cee38ba..2384dde09 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "log" - "sort" "strconv" "strings" "sync" @@ -375,57 +374,6 @@ func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time return fmt.Sprintf("%s%s%d", periodicJob.ID, JobLaunchSuffix, time.Unix()) } -// CreatedEvals returns the set of evaluations created from the passed periodic -// job in sorted order, with the earliest job launch first. -// TODO: Get rid of this -func (p *PeriodicDispatch) CreatedEvals(periodicJobID string) (PeriodicEvals, error) { - state := p.srv.fsm.State() - iter, err := state.ChildJobs(periodicJobID) - if err != nil { - return nil, fmt.Errorf("failed to look up children of job %v: %v", periodicJobID, err) - } - - var evals PeriodicEvals - for i := iter.Next(); i != nil; i = iter.Next() { - job := i.(*structs.Job) - childEvals, err := state.EvalsByJob(job.ID) - if err != nil { - return nil, fmt.Errorf("failed to look up evals for job %v: %v", job.ID, err) - } - - for _, eval := range childEvals { - launch, err := p.LaunchTime(eval.JobID) - if err != nil { - return nil, fmt.Errorf("failed to get launch time for eval %v: %v", eval, err) - } - - pEval := &PeriodicEval{ - Eval: eval, - JobLaunch: launch, - } - - evals = append(evals, pEval) - } - } - - // Return the sorted evals. - sort.Sort(evals) - return evals, nil -} - -// PeriodicEval stores the evaluation and launch time for an instantiated -// periodic job. -type PeriodicEval struct { - Eval *structs.Evaluation - JobLaunch time.Time -} - -type PeriodicEvals []*PeriodicEval - -func (p PeriodicEvals) Len() int { return len(p) } -func (p PeriodicEvals) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -func (p PeriodicEvals) Less(i, j int) bool { return p[i].JobLaunch.Before(p[j].JobLaunch) } - // LaunchTime returns the launch time of the job. This is only valid for // jobs created by PeriodicDispatch and will otherwise return an error. func (p *PeriodicDispatch) LaunchTime(jobID string) (time.Time, error) { @@ -453,7 +401,7 @@ func (p *PeriodicDispatch) Flush() { p.heap = NewPeriodicHeap() } -// TODO +// periodicHeap wraps a heap and gives operations other than Push/Pop. type periodicHeap struct { index map[string]*periodicJob heap periodicHeapImp diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index db6594634..5ca4ff6a4 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -4,6 +4,7 @@ import ( "fmt" "math/rand" "reflect" + "sort" "strconv" "strings" "testing" @@ -80,6 +81,57 @@ func testPeriodicJob(times ...time.Time) *structs.Job { return job } +// createdEvals returns the set of evaluations created from the passed periodic +// job in sorted order, with the earliest job launch first. +func createdEvals(p *PeriodicDispatch, periodicJobID string) (PeriodicEvals, error) { + state := p.srv.fsm.State() + iter, err := state.ChildJobs(periodicJobID) + if err != nil { + return nil, fmt.Errorf("failed to look up children of job %v: %v", periodicJobID, err) + } + + var evals PeriodicEvals + for i := iter.Next(); i != nil; i = iter.Next() { + job := i.(*structs.Job) + childEvals, err := state.EvalsByJob(job.ID) + if err != nil { + return nil, fmt.Errorf("failed to look up evals for job %v: %v", job.ID, err) + } + + for _, eval := range childEvals { + launch, err := p.LaunchTime(eval.JobID) + if err != nil { + return nil, fmt.Errorf("failed to get launch time for eval %v: %v", eval, err) + } + + pEval := &PeriodicEval{ + Eval: eval, + JobLaunch: launch, + } + + evals = append(evals, pEval) + } + } + + // Return the sorted evals. + sort.Sort(evals) + return evals, nil +} + +// PeriodicEval stores the evaluation and launch time for an instantiated +// periodic job. +type PeriodicEval struct { + Eval *structs.Evaluation + JobLaunch time.Time +} + +// For sorting. +type PeriodicEvals []*PeriodicEval + +func (p PeriodicEvals) Len() int { return len(p) } +func (p PeriodicEvals) Swap(i, j int) { p[i], p[j] = p[j], p[i] } +func (p PeriodicEvals) Less(i, j int) bool { return p[i].JobLaunch.Before(p[j].JobLaunch) } + func TestPeriodicDispatch_Add_NonPeriodic(t *testing.T) { t.Parallel() s1 := testServer(t, func(c *Config) { @@ -159,9 +211,9 @@ func TestPeriodicDispatch_Add_TriggersUpdate(t *testing.T) { time.Sleep(2 * time.Second) // Check that an eval was created for the right time. - evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + evals, err := createdEvals(s1.periodicDispatcher, job.ID) if err != nil { - t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + t.Fatalf("createdEvals(%v) failed %v", job.ID, err) } if len(evals) != 1 { @@ -241,9 +293,9 @@ func TestPeriodicDispatch_Remove_TriggersUpdate(t *testing.T) { time.Sleep(2 * time.Second) // Check that an eval wasn't created. - evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + evals, err := createdEvals(s1.periodicDispatcher, job.ID) if err != nil { - t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + t.Fatalf("createdEvals(%v) failed %v", job.ID, err) } if len(evals) != 0 { @@ -286,9 +338,9 @@ func TestPeriodicDispatch_ForceRun_Tracked(t *testing.T) { } // Check that an eval was created for the right time. - evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + evals, err := createdEvals(s1.periodicDispatcher, job.ID) if err != nil { - t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + t.Fatalf("createdEvals(%v) failed %v", job.ID, err) } if len(evals) != 1 { @@ -317,9 +369,9 @@ func TestPeriodicDispatch_Run_Multiple(t *testing.T) { time.Sleep(3 * time.Second) // Check that the evals were created correctly - evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + evals, err := createdEvals(s1.periodicDispatcher, job.ID) if err != nil { - t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + t.Fatalf("createdEvals(%v) failed %v", job.ID, err) } d := s1.periodicDispatcher @@ -356,9 +408,9 @@ func TestPeriodicDispatch_Run_SameTime(t *testing.T) { // Check that the evals were created correctly for _, job := range []*structs.Job{job, job2} { - evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + evals, err := createdEvals(s1.periodicDispatcher, job.ID) if err != nil { - t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + t.Fatalf("createdEvals(%v) failed %v", job.ID, err) } if len(evals) != 1 { @@ -447,9 +499,9 @@ func TestPeriodicDispatch_Complex(t *testing.T) { time.Sleep(4 * time.Second) actual := make(map[string][]string, len(expected)) for _, job := range jobs { - evals, err := s1.periodicDispatcher.CreatedEvals(job.ID) + evals, err := createdEvals(s1.periodicDispatcher, job.ID) if err != nil { - t.Fatalf("CreatedEvals(%v) failed %v", job.ID, err) + t.Fatalf("createdEvals(%v) failed %v", job.ID, err) } var jobs []string @@ -472,39 +524,6 @@ func shuffle(jobs []*structs.Job) { } } -func TestPeriodicDispatch_CreatedEvals(t *testing.T) { - t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) - - // Create three evals. - job := mock.PeriodicJob() - now := time.Now().Round(time.Second) - times := []time.Time{now.Add(1 * time.Second), now.Add(2 * time.Second), now} - for _, time := range times { - if err := s1.periodicDispatcher.createEval(job, time); err != nil { - t.Fatalf("createEval() failed: %v", err) - } - } - - // Get the created evals. - created, err := s1.periodicDispatcher.CreatedEvals(job.ID) - if err != nil { - t.Fatalf("CreatedEvals(%v) failed: %v", job.ID, err) - } - expected := []time.Time{times[2], times[0], times[1]} - for i, c := range created { - if c.JobLaunch != expected[i] { - t.Fatalf("CreatedEvals are in wrong order; got %v; want %v at index %d", - c.JobLaunch, expected[i], i) - } - } - -} - func TestPeriodicHeap_Order(t *testing.T) { t.Parallel() h := NewPeriodicHeap() From 37c1f569213186c755968ff40178ff853e428abe Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 7 Dec 2015 15:58:17 -0800 Subject: [PATCH 22/45] Snapshot and restore --- nomad/fsm.go | 41 ++++++++++++++++ nomad/fsm_test.go | 24 ++++++++++ nomad/state/state_store.go | 22 +++++++++ nomad/state/state_store_test.go | 85 +++++++++++++++++++++++++++++++++ 4 files changed, 172 insertions(+) diff --git a/nomad/fsm.go b/nomad/fsm.go index af1799404..bfff13920 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -32,6 +32,7 @@ const ( EvalSnapshot AllocSnapshot TimeTableSnapshot + PeriodicLaunchSnapshot ) // nomadFSM implements a finite state machine that is used @@ -456,6 +457,15 @@ func (n *nomadFSM) Restore(old io.ReadCloser) error { return err } + case PeriodicLaunchSnapshot: + launch := new(structs.PeriodicLaunch) + if err := dec.Decode(launch); err != nil { + return err + } + if err := restore.PeriodicLaunchRestore(launch); err != nil { + return err + } + default: return fmt.Errorf("Unrecognized snapshot type: %v", msgType) } @@ -506,6 +516,10 @@ func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { sink.Cancel() return err } + if err := s.persistPeriodicLaunches(sink, encoder); err != nil { + sink.Cancel() + return err + } return nil } @@ -644,6 +658,33 @@ func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, return nil } +func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink, + encoder *codec.Encoder) error { + // Get all the jobs + launches, err := s.snap.PeriodicLaunches() + if err != nil { + return err + } + + for { + // Get the next item + raw := launches.Next() + if raw == nil { + break + } + + // Prepare the request struct + launch := raw.(*structs.PeriodicLaunch) + + // Write out a job registration + sink.Write([]byte{byte(PeriodicLaunchSnapshot)}) + if err := encoder.Encode(launch); err != nil { + return err + } + } + return nil +} + // Release is a no-op, as we just need to GC the pointer // to the state store snapshot. There is nothing to explicitly // cleanup. diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index d89ee67f8..332eaccc4 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -639,3 +639,27 @@ func TestFSM_SnapshotRestore_TimeTable(t *testing.T) { t.Fatalf("bad") } } + +func TestFSM_SnapshotRestore_PeriodicLaunches(t *testing.T) { + // Add some state + fsm := testFSM(t) + state := fsm.State() + job1 := mock.Job() + launch1 := &structs.PeriodicLaunch{job1.ID, time.Now()} + state.UpsertPeriodicLaunch(1000, launch1) + job2 := mock.Job() + launch2 := &structs.PeriodicLaunch{job2.ID, time.Now()} + state.UpsertPeriodicLaunch(1001, launch2) + + // Verify the contents + fsm2 := testSnapshotRestore(t, fsm) + state2 := fsm2.State() + out1, _ := state2.PeriodicLaunchByID(launch1.ID) + out2, _ := state2.PeriodicLaunchByID(launch2.ID) + if !reflect.DeepEqual(launch1, out1) { + t.Fatalf("bad: \n%#v\n%#v", out1, job1) + } + if !reflect.DeepEqual(launch2, out2) { + t.Fatalf("bad: \n%#v\n%#v", out2, job2) + } +} diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 103c08a8b..3169effdb 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -482,6 +482,18 @@ func (s *StateStore) PeriodicLaunchByID(id string) (*structs.PeriodicLaunch, err return nil, nil } +// PeriodicLaunches returns an iterator over all the periodic launches +func (s *StateStore) PeriodicLaunches() (memdb.ResultIterator, error) { + txn := s.db.Txn(false) + + // Walk the entire table + iter, err := txn.Get("periodic_launch", "id") + if err != nil { + return nil, err + } + return iter, nil +} + // UpsertEvaluation is used to upsert an evaluation func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { txn := s.db.Txn(true) @@ -925,6 +937,16 @@ func (r *StateRestore) IndexRestore(idx *IndexEntry) error { return nil } +// PeriodicLaunchRestore is used to restore a periodic launch. +func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error { + r.items.Add(watch.Item{Table: "periodic_launch"}) + r.items.Add(watch.Item{Job: launch.ID}) + if err := r.txn.Insert("periodic_launch", launch); err != nil { + return fmt.Errorf("periodic launch insert failed: %v", err) + } + return nil +} + // stateWatch holds shared state for watching updates. This is // outside of StateStore so it can be shared with snapshots. type stateWatch struct { diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 5e27f0112..18515481d 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -785,6 +785,91 @@ func TestStateStore_DeletePeriodicLaunch(t *testing.T) { notify.verify(t) } +func TestStateStore_PeriodicLaunches(t *testing.T) { + state := testStateStore(t) + var launches []*structs.PeriodicLaunch + + for i := 0; i < 10; i++ { + job := mock.Job() + launch := &structs.PeriodicLaunch{job.ID, time.Now()} + launches = append(launches, launch) + + err := state.UpsertPeriodicLaunch(1000+uint64(i), launch) + if err != nil { + t.Fatalf("err: %v", err) + } + } + + iter, err := state.PeriodicLaunches() + if err != nil { + t.Fatalf("err: %v", err) + } + + out := make(map[string]*structs.PeriodicLaunch, 10) + for { + raw := iter.Next() + if raw == nil { + break + } + launch := raw.(*structs.PeriodicLaunch) + if _, ok := out[launch.ID]; ok { + t.Fatalf("duplicate: %v", launch.ID) + } + + out[launch.ID] = launch + } + + for _, launch := range launches { + l, ok := out[launch.ID] + if !ok { + t.Fatalf("bad %v", launch.ID) + } + + if !reflect.DeepEqual(launch, l) { + t.Fatalf("bad: %#v %#v", launch, l) + } + + delete(out, launch.ID) + } + + if len(out) != 0 { + t.Fatalf("leftover: %#v", out) + } +} + +func TestStateStore_RestorePeriodicLaunch(t *testing.T) { + state := testStateStore(t) + job := mock.Job() + launch := &structs.PeriodicLaunch{job.ID, time.Now()} + + notify := setupNotifyTest( + state, + watch.Item{Table: "periodic_launch"}, + watch.Item{Job: job.ID}) + + restore, err := state.Restore() + if err != nil { + t.Fatalf("err: %v", err) + } + + err = restore.PeriodicLaunchRestore(launch) + if err != nil { + t.Fatalf("err: %v", err) + } + restore.Commit() + + out, err := state.PeriodicLaunchByID(job.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + + if !reflect.DeepEqual(out, launch) { + t.Fatalf("Bad: %#v %#v", out, job) + } + + notify.verify(t) +} + func TestStateStore_Indexes(t *testing.T) { state := testStateStore(t) node := mock.Node() From 7586a84dbe17494cdf8bdfd61f3b7fe74ea39320 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 8 Dec 2015 17:26:22 -0800 Subject: [PATCH 23/45] Update deregister test --- api/jobs_test.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/api/jobs_test.go b/api/jobs_test.go index 3d81204c0..51018e159 100644 --- a/api/jobs_test.go +++ b/api/jobs_test.go @@ -154,12 +154,10 @@ func TestJobs_Deregister(t *testing.T) { } assertWriteMeta(t, wm) - // Attempting delete on non-existing job does not error - _, wm2, err := jobs.Deregister("nope", nil) - if err != nil { - t.Fatalf("err: %s", err) + // Attempting delete on non-existing job returns an error + if _, _, err = jobs.Deregister("nope", nil); err == nil { + t.Fatalf("expected error deregistering job") } - assertWriteMeta(t, wm2) // Deleting an existing job works evalID, wm3, err := jobs.Deregister("job1", nil) From 642219ba5db804271fae969f822b1c565efc4be7 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 9 Dec 2015 16:46:06 -0800 Subject: [PATCH 24/45] Race condition fixed --- nomad/periodic.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nomad/periodic.go b/nomad/periodic.go index 2384dde09..ac3647b3a 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -116,7 +116,7 @@ func (p *PeriodicDispatch) Add(job *structs.Job) error { disabled := !job.IsPeriodic() || !job.Periodic.Enabled _, tracked := p.tracked[job.ID] if tracked && disabled { - return p.Remove(job.ID) + return p.removeLocked(job.ID) } // If the job is diabled and we aren't tracking it, do nothing. @@ -155,7 +155,12 @@ func (p *PeriodicDispatch) Add(job *structs.Job) error { func (p *PeriodicDispatch) Remove(jobID string) error { p.l.Lock() defer p.l.Unlock() + return p.removeLocked(jobID) +} +// Remove stops tracking the passed job. If the job is not tracked, it is a +// no-op. It assumes this is called while a lock is held. +func (p *PeriodicDispatch) removeLocked(jobID string) error { // Do nothing if not enabled if !p.enabled { return nil From bd4eda0b0517963191890b4e53bab0d1299a811b Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 10 Dec 2015 14:07:34 -0800 Subject: [PATCH 25/45] Fix nomad run --- command/run.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/command/run.go b/command/run.go index d25692135..d50098d16 100644 --- a/command/run.go +++ b/command/run.go @@ -5,6 +5,7 @@ import ( "encoding/gob" "fmt" "strings" + "time" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/jobspec" @@ -89,6 +90,9 @@ func (c *RunCommand) Run(args []string) int { return 1 } + // Check if the job is periodic. + periodic := job.IsPeriodic() + // Convert it to something we can use apiJob, err := convertJob(job) if err != nil { @@ -111,9 +115,14 @@ func (c *RunCommand) Run(args []string) int { } // Check if we should enter monitor mode - if detach { + if detach || periodic { c.Ui.Output("Job registration successful") - c.Ui.Output("Evaluation ID: " + evalID) + if periodic { + c.Ui.Output(fmt.Sprintf("Approximate next launch time: %v", job.Periodic.Next(time.Now()))) + } else { + c.Ui.Output("Evaluation ID: " + evalID) + } + return 0 } From 0a8c49f36b1dd046877b64afb02ea111cdd0a4fc Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 11 Dec 2015 15:15:28 -0800 Subject: [PATCH 26/45] Remove incorrect comment --- nomad/state/state_store.go | 1 - 1 file changed, 1 deletion(-) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 3169effdb..070bb8aad 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -375,7 +375,6 @@ func (s *StateStore) ChildJobs(id string) (memdb.ResultIterator, error) { func (s *StateStore) JobsByPeriodic(periodic bool) (memdb.ResultIterator, error) { txn := s.db.Txn(false) - // Scan all jobs whose parent is the passed id. iter, err := txn.Get("jobs", "periodic", periodic) if err != nil { return nil, err From 49dd0dc4615a31faec207a9e141c61f9c36eb00a Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 16 Dec 2015 13:46:09 -0800 Subject: [PATCH 27/45] fixes from review --- nomad/fsm.go | 4 ++-- nomad/fsm_test.go | 4 ++-- nomad/periodic.go | 19 +++++++++-------- nomad/periodic_test.go | 6 +++--- nomad/state/state_store.go | 36 +++++++++++++++++++++------------ nomad/state/state_store_test.go | 27 +++++++++++++++++++------ nomad/structs/structs.go | 6 +++++- 7 files changed, 65 insertions(+), 37 deletions(-) diff --git a/nomad/fsm.go b/nomad/fsm.go index bfff13920..334957754 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -217,7 +217,7 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { } // Record the insertion time as a launch. - launch := &structs.PeriodicLaunch{req.Job.ID, time.Now()} + launch := &structs.PeriodicLaunch{ID: req.Job.ID, Launch: time.Now()} if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) return err @@ -242,7 +242,7 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) return err } - launch := &structs.PeriodicLaunch{parentID, t} + launch := &structs.PeriodicLaunch{ID: parentID, Launch: t} if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) return err diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 332eaccc4..c1bf4dfa5 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -645,10 +645,10 @@ func TestFSM_SnapshotRestore_PeriodicLaunches(t *testing.T) { fsm := testFSM(t) state := fsm.State() job1 := mock.Job() - launch1 := &structs.PeriodicLaunch{job1.ID, time.Now()} + launch1 := &structs.PeriodicLaunch{ID: job1.ID, Launch: time.Now()} state.UpsertPeriodicLaunch(1000, launch1) job2 := mock.Job() - launch2 := &structs.PeriodicLaunch{job2.ID, time.Now()} + launch2 := &structs.PeriodicLaunch{ID: job2.ID, Launch: time.Now()} state.UpsertPeriodicLaunch(1001, launch2) // Verify the contents diff --git a/nomad/periodic.go b/nomad/periodic.go index ac3647b3a..08f45b91f 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -27,7 +27,7 @@ type PeriodicRunner interface { Add(job *structs.Job) error Remove(jobID string) error ForceRun(jobID string) error - Tracked() []structs.Job + Tracked() []*structs.Job Flush() LaunchTime(jobID string) (time.Time, error) } @@ -89,13 +89,13 @@ func (p *PeriodicDispatch) Start() { } // Tracked returns the set of tracked job IDs. -func (p *PeriodicDispatch) Tracked() []structs.Job { +func (p *PeriodicDispatch) Tracked() []*structs.Job { p.l.RLock() defer p.l.RUnlock() - tracked := make([]structs.Job, len(p.tracked)) + tracked := make([]*structs.Job, len(p.tracked)) i := 0 for _, job := range p.tracked { - tracked[i] = *job + tracked[i] = job i++ } return tracked @@ -115,12 +115,12 @@ func (p *PeriodicDispatch) Add(job *structs.Job) error { // If we were tracking a job and it has been disabled or made non-periodic remove it. disabled := !job.IsPeriodic() || !job.Periodic.Enabled _, tracked := p.tracked[job.ID] - if tracked && disabled { - return p.removeLocked(job.ID) - } - - // If the job is diabled and we aren't tracking it, do nothing. if disabled { + if tracked { + p.removeLocked(job.ID) + } + + // If the job is diabled and we aren't tracking it, do nothing. return nil } @@ -223,7 +223,6 @@ PICK: p.l.RLock() if p.heap.Length() == 0 { p.l.RUnlock() - p.logger.Printf("[DEBUG] nomad.periodic: no periodic jobs; waiting") select { case <-p.stopCh: return diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index 5ca4ff6a4..47604a17f 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -58,11 +58,11 @@ func (m *MockPeriodic) Flush() { m.Jobs = make(map[string]*structs.Job) } -func (m *MockPeriodic) Tracked() []structs.Job { - tracked := make([]structs.Job, len(m.Jobs)) +func (m *MockPeriodic) Tracked() []*structs.Job { + tracked := make([]*structs.Job, len(m.Jobs)) i := 0 for _, job := range m.Jobs { - tracked[i] = *job + tracked[i] = job i++ } return tracked diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 070bb8aad..d73616696 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -131,10 +131,6 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error { txn := s.db.Txn(true) defer txn.Abort() - watcher := watch.NewItems() - watcher.Add(watch.Item{Table: "nodes"}) - watcher.Add(watch.Item{Node: nodeID}) - // Lookup the node existing, err := txn.First("nodes", "id", nodeID) if err != nil { @@ -144,6 +140,10 @@ func (s *StateStore) DeleteNode(index uint64, nodeID string) error { return fmt.Errorf("node not found") } + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "nodes"}) + watcher.Add(watch.Item{Node: nodeID}) + // Delete the node if err := txn.Delete("nodes", existing); err != nil { return fmt.Errorf("node delete failed: %v", err) @@ -306,10 +306,6 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error { txn := s.db.Txn(true) defer txn.Abort() - watcher := watch.NewItems() - watcher.Add(watch.Item{Table: "jobs"}) - watcher.Add(watch.Item{Job: jobID}) - // Lookup the node existing, err := txn.First("jobs", "id", jobID) if err != nil { @@ -319,6 +315,10 @@ func (s *StateStore) DeleteJob(index uint64, jobID string) error { return fmt.Errorf("job not found") } + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "jobs"}) + watcher.Add(watch.Item{Job: jobID}) + // Delete the node if err := txn.Delete("jobs", existing); err != nil { return fmt.Errorf("job delete failed: %v", err) @@ -417,10 +417,20 @@ func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.Periodic watcher.Add(watch.Item{Job: launch.ID}) // Check if the job already exists - if _, err := txn.First("periodic_launch", "id", launch.ID); err != nil { + existing, err := txn.First("periodic_launch", "id", launch.ID) + if err != nil { return fmt.Errorf("periodic launch lookup failed: %v", err) } + // Setup the indexes correctly + if existing != nil { + launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex + launch.ModifyIndex = index + } else { + launch.CreateIndex = index + launch.ModifyIndex = index + } + // Insert the job if err := txn.Insert("periodic_launch", launch); err != nil { return fmt.Errorf("launch insert failed: %v", err) @@ -439,10 +449,6 @@ func (s *StateStore) DeletePeriodicLaunch(index uint64, jobID string) error { txn := s.db.Txn(true) defer txn.Abort() - watcher := watch.NewItems() - watcher.Add(watch.Item{Table: "periodic_launch"}) - watcher.Add(watch.Item{Job: jobID}) - // Lookup the launch existing, err := txn.First("periodic_launch", "id", jobID) if err != nil { @@ -452,6 +458,10 @@ func (s *StateStore) DeletePeriodicLaunch(index uint64, jobID string) error { return fmt.Errorf("launch not found") } + watcher := watch.NewItems() + watcher.Add(watch.Item{Table: "periodic_launch"}) + watcher.Add(watch.Item{Job: jobID}) + // Delete the launch if err := txn.Delete("periodic_launch", existing); err != nil { return fmt.Errorf("launch delete failed: %v", err) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 18515481d..1e158949b 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -672,7 +672,7 @@ func TestStateStore_RestoreJob(t *testing.T) { func TestStateStore_UpsertPeriodicLaunch(t *testing.T) { state := testStateStore(t) job := mock.Job() - launch := &structs.PeriodicLaunch{job.ID, time.Now()} + launch := &structs.PeriodicLaunch{ID: job.ID, Launch: time.Now()} notify := setupNotifyTest( state, @@ -688,6 +688,12 @@ func TestStateStore_UpsertPeriodicLaunch(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } + if out.CreateIndex != 1000 { + t.Fatalf("bad: %#v", out) + } + if out.ModifyIndex != 1000 { + t.Fatalf("bad: %#v", out) + } if !reflect.DeepEqual(launch, out) { t.Fatalf("bad: %#v %#v", job, out) @@ -707,7 +713,7 @@ func TestStateStore_UpsertPeriodicLaunch(t *testing.T) { func TestStateStore_UpdateUpsertPeriodicLaunch(t *testing.T) { state := testStateStore(t) job := mock.Job() - launch := &structs.PeriodicLaunch{job.ID, time.Now()} + launch := &structs.PeriodicLaunch{ID: job.ID, Launch: time.Now()} notify := setupNotifyTest( state, @@ -719,7 +725,10 @@ func TestStateStore_UpdateUpsertPeriodicLaunch(t *testing.T) { t.Fatalf("err: %v", err) } - launch2 := &structs.PeriodicLaunch{job.ID, launch.Launch.Add(1 * time.Second)} + launch2 := &structs.PeriodicLaunch{ + ID: job.ID, + Launch: launch.Launch.Add(1 * time.Second), + } err = state.UpsertPeriodicLaunch(1001, launch2) if err != nil { t.Fatalf("err: %v", err) @@ -729,6 +738,12 @@ func TestStateStore_UpdateUpsertPeriodicLaunch(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } + if out.CreateIndex != 1000 { + t.Fatalf("bad: %#v", out) + } + if out.ModifyIndex != 1001 { + t.Fatalf("bad: %#v", out) + } if !reflect.DeepEqual(launch2, out) { t.Fatalf("bad: %#v %#v", launch2, out) @@ -748,7 +763,7 @@ func TestStateStore_UpdateUpsertPeriodicLaunch(t *testing.T) { func TestStateStore_DeletePeriodicLaunch(t *testing.T) { state := testStateStore(t) job := mock.Job() - launch := &structs.PeriodicLaunch{job.ID, time.Now()} + launch := &structs.PeriodicLaunch{ID: job.ID, Launch: time.Now()} notify := setupNotifyTest( state, @@ -791,7 +806,7 @@ func TestStateStore_PeriodicLaunches(t *testing.T) { for i := 0; i < 10; i++ { job := mock.Job() - launch := &structs.PeriodicLaunch{job.ID, time.Now()} + launch := &structs.PeriodicLaunch{ID: job.ID, Launch: time.Now()} launches = append(launches, launch) err := state.UpsertPeriodicLaunch(1000+uint64(i), launch) @@ -840,7 +855,7 @@ func TestStateStore_PeriodicLaunches(t *testing.T) { func TestStateStore_RestorePeriodicLaunch(t *testing.T) { state := testStateStore(t) job := mock.Job() - launch := &structs.PeriodicLaunch{job.ID, time.Now()} + launch := &structs.PeriodicLaunch{ID: job.ID, Launch: time.Now()} notify := setupNotifyTest( state, diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a33befbd3..0773e94fb 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -939,7 +939,7 @@ const ( // PeriodicSpecTest is only used by unit tests. It is a sorted, comma // seperated list of unix timestamps at which to launch. - PeriodicSpecTest = "test" + PeriodicSpecTest = "_internal_test" ) // Periodic defines the interval a job should be run at. @@ -1021,6 +1021,10 @@ func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { type PeriodicLaunch struct { ID string // ID of the periodic job. Launch time.Time // The last launch time. + + // Raft Indexes + CreateIndex uint64 + ModifyIndex uint64 } var ( From b5d59c8d898ea603f0b8c7ab5106b8cb42c648af Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 16 Dec 2015 13:55:50 -0800 Subject: [PATCH 28/45] Fix test --- nomad/periodic_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index 47604a17f..b9e397080 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -180,7 +180,7 @@ func TestPeriodicDispatch_Add_UpdateJob(t *testing.T) { t.Fatalf("Add didn't update: %v", tracked) } - if !reflect.DeepEqual(*job, tracked[0]) { + if !reflect.DeepEqual(job, tracked[0]) { t.Fatalf("Add didn't properly update: got %v; want %v", tracked[0], job) } } From 8165c1fc22880d2ad7da6570123fa140def8d3fc Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 16 Dec 2015 14:14:55 -0800 Subject: [PATCH 29/45] Improve restorePeriodicDispatcher documentation (and bug fix) --- nomad/leader.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/nomad/leader.go b/nomad/leader.go index f1ec1a3b3..5969a9239 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -1,6 +1,7 @@ package nomad import ( + "errors" "fmt" "time" @@ -192,20 +193,28 @@ func (s *Server) restorePeriodicDispatcher() error { job := i.(*structs.Job) s.periodicDispatcher.Add(job) + // If the periodic job has never been launched before, launch will hold + // the time the periodic job was added. Otherwise it has the last launch + // time of the periodic job. launch, err := s.fsm.State().PeriodicLaunchByID(job.ID) if err != nil || launch == nil { return fmt.Errorf("failed to get periodic launch time: %v", err) } - if !job.Periodic.Next(launch.Launch).Before(now) { + // nextLaunch is the next launch that should occur. + nextLaunch := job.Periodic.Next(launch.Launch) + + // We skip force launching the job if there should be no next launch + // (the zero case) or if the next launch time is in the future. If it is + // in the future, it will be handled by the periodic dispatcher. + if nextLaunch.IsZero() || !nextLaunch.Before(now) { continue } if err := s.periodicDispatcher.ForceRun(job.ID); err != nil { - s.logger.Printf( - "[ERR] nomad.periodic: force run of periodic job %q failed: %v", - job.ID, err) - return fmt.Errorf("failed for force run periodic job %q: %v", job.ID, err) + msg := fmt.Sprintf("force run of periodic job %q failed: %v", job.ID, err) + s.logger.Printf("[ERR] nomad.periodic: %s", msg) + return errors.New(msg) } s.logger.Printf("[DEBUG] nomad.periodic: periodic job %q force"+ " run during leadership establishment", job.ID) From a60783a4ca068b9967b34d0e239199d32aa8d0de Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 16 Dec 2015 17:07:50 -0800 Subject: [PATCH 30/45] Simplify run function and add nextLaunch test --- nomad/periodic.go | 141 +++++++++++++++++++++++------------------ nomad/periodic_test.go | 55 ++++++++++++++++ 2 files changed, 133 insertions(+), 63 deletions(-) diff --git a/nomad/periodic.go b/nomad/periodic.go index 08f45b91f..0d9e1547e 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -75,6 +75,7 @@ func (p *PeriodicDispatch) SetEnabled(enabled bool) { if p.running { close(p.stopCh) <-p.waitCh + p.running = false } p.Flush() } @@ -203,29 +204,90 @@ func (p *PeriodicDispatch) ForceRun(jobID string) error { return p.createEval(job, time.Now()) } -// run is a long-lived function that waits til a job's periodic spec is met and +// shouldRun returns whether the long lived run function should run. +func (p *PeriodicDispatch) shouldRun() bool { + p.l.RLock() + defer p.l.RUnlock() + return p.enabled && p.running +} + +// run is a long-lived function that waits till a job's periodic spec is met and // then creates an evaluation to run the job. func (p *PeriodicDispatch) run() { defer close(p.waitCh) - - // Do nothing if not enabled. - p.l.RLock() - if !p.enabled { - p.l.RUnlock() - return - } - p.l.RUnlock() - var now time.Time + for p.shouldRun() { + job, launch, err := p.nextLaunch() + if err != nil { + p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) + return + } else if job == nil { + return + } + now = time.Now() + p.logger.Printf("[DEBUG] nomad.periodic: launching job %q in %s", job.ID, launch.Sub(now)) + + select { + case <-p.stopCh: + return + case <-p.updateCh: + continue + case <-time.After(launch.Sub(now)): + // Get the current time so that we don't miss any jobs will we're creating evals. + now = time.Now() + p.dispatch(launch, now) + } + } +} + +// dispatch scans the periodic jobs in order of launch time and creates +// evaluations for all jobs whose next launch time is equal to that of the +// passed launchTime. The now time is used to determine the next launch time for +// the dispatched jobs. +func (p *PeriodicDispatch) dispatch(launchTime time.Time, now time.Time) { + p.l.Lock() + defer p.l.Unlock() + + // Create evals for all the jobs with the same launch time. + for { + if p.heap.Length() == 0 { + return + } + + j, err := p.heap.Peek() + if err != nil { + p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) + return + } + + if j.next != launchTime { + return + } + + if err := p.heap.Update(j.job, j.job.Periodic.Next(now)); err != nil { + p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %q: %v", j.job.ID, err) + } + + p.logger.Printf("[DEBUG] nomad.periodic: launching job %v at %v", j.job.ID, launchTime) + go p.createEval(j.job, launchTime) + } +} + +// nextLaunch returns the next job to launch and when it should be launched. If +// the next job can't be determined, an error is returned. If the dispatcher is +// stopped, a nil job will be returned. +func (p *PeriodicDispatch) nextLaunch() (*structs.Job, time.Time, error) { PICK: // If there is nothing wait for an update. p.l.RLock() if p.heap.Length() == 0 { p.l.RUnlock() + + // Block until there is an update, or the dispatcher is stopped. select { case <-p.stopCh: - return + return nil, time.Time{}, nil case <-p.updateCh: } p.l.RLock() @@ -236,70 +298,23 @@ PICK: if err != nil { select { case <-p.stopCh: - return + return nil, time.Time{}, nil default: - p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) - return + return nil, time.Time{}, err } } - launchTime := nextJob.next - // If there are only invalid times, wait for an update. - if launchTime.IsZero() { - p.logger.Printf("[DEBUG] nomad.periodic: job %q has no valid launch time", nextJob.job.ID) + if nextJob.next.IsZero() { select { case <-p.stopCh: - return + return nil, time.Time{}, nil case <-p.updateCh: goto PICK } } - now = time.Now() - p.logger.Printf("[DEBUG] nomad.periodic: launching job %q in %s", - nextJob.job.ID, nextJob.next.Sub(now)) - - select { - case <-p.stopCh: - return - case <-p.updateCh: - goto PICK - case <-time.After(nextJob.next.Sub(now)): - // Get the current time so that we don't miss any jobs will we are - // creating evals. - nowUpdate := time.Now() - - // Create evals for all the jobs with the same launch time. - p.l.Lock() - for { - if p.heap.Length() == 0 { - break - } - - j, err := p.heap.Peek() - if err != nil { - p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) - break - } - - if j.next != launchTime { - break - } - - if err := p.heap.Update(j.job, j.job.Periodic.Next(nowUpdate)); err != nil { - p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %q: %v", j.job.ID, err) - } - - p.logger.Printf("[DEBUG] nomad.periodic: launching job %v at %v", j.job.ID, launchTime) - go p.createEval(j.job, launchTime) - } - - p.l.Unlock() - now = nowUpdate - } - - goto PICK + return nextJob.job, nextJob.next, nil } // createEval instantiates a job based on the passed periodic job and submits an diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index b9e397080..4cde0b3f9 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -516,6 +516,61 @@ func TestPeriodicDispatch_Complex(t *testing.T) { } } +func TestPeriodicDispatch_NextLaunch(t *testing.T) { + t.Parallel() + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + // Create two job that will be launched at the same time. + invalid := time.Unix(0, 0) + expected := time.Now().Round(1 * time.Second).Add(1 * time.Second) + job := testPeriodicJob(invalid) + job2 := testPeriodicJob(expected) + + // Make sure the periodic dispatcher isn't running. + close(s1.periodicDispatcher.stopCh) + s1.periodicDispatcher.stopCh = make(chan struct{}) + + // Run nextLaunch. + timeout := make(chan struct{}) + var j *structs.Job + var launch time.Time + var err error + go func() { + j, launch, err = s1.periodicDispatcher.nextLaunch() + close(timeout) + }() + + // Add them. + if err := s1.periodicDispatcher.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + // Delay adding a valid job. + time.Sleep(200 * time.Millisecond) + if err := s1.periodicDispatcher.Add(job2); err != nil { + t.Fatalf("Add failed %v", err) + } + + select { + case <-time.After(2 * time.Second): + t.Fatal("timeout") + case <-timeout: + if err != nil { + t.Fatalf("nextLaunch() failed: %v", err) + } + if j != job2 { + t.Fatalf("Incorrect job returned; got %v; want %v", j, job2) + } + if launch != expected { + t.Fatalf("Incorrect launch time; got %v; want %v", launch, expected) + } + } +} + func shuffle(jobs []*structs.Job) { rand.Seed(time.Now().Unix()) for i := range jobs { From b3e87b671968d32fba118b6bd91a7255f58ccc5b Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 18 Dec 2015 17:26:05 -0800 Subject: [PATCH 31/45] Remove the periodicRunner interface and pass the server as an interface to the periodicDispatcher --- nomad/fsm.go | 36 ++-- nomad/fsm_test.go | 7 +- nomad/leader_test.go | 38 ++-- nomad/periodic.go | 122 ++++++------ nomad/periodic_test.go | 390 ++++++++++++++------------------------- nomad/server.go | 2 +- nomad/structs/structs.go | 4 +- 7 files changed, 243 insertions(+), 356 deletions(-) diff --git a/nomad/fsm.go b/nomad/fsm.go index 334957754..f9c43f7f4 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -39,12 +39,12 @@ const ( // along with Raft to provide strong consistency. We implement // this outside the Server to avoid exposing this outside the package. type nomadFSM struct { - evalBroker *EvalBroker - periodicRunner PeriodicRunner - logOutput io.Writer - logger *log.Logger - state *state.StateStore - timetable *TimeTable + evalBroker *EvalBroker + periodicDispatcher *PeriodicDispatch + logOutput io.Writer + logger *log.Logger + state *state.StateStore + timetable *TimeTable } // nomadSnapshot is used to provide a snapshot of the current @@ -60,7 +60,7 @@ type snapshotHeader struct { } // NewFSMPath is used to construct a new FSM with a blank state -func NewFSM(evalBroker *EvalBroker, periodic PeriodicRunner, logOutput io.Writer) (*nomadFSM, error) { +func NewFSM(evalBroker *EvalBroker, periodic *PeriodicDispatch, logOutput io.Writer) (*nomadFSM, error) { // Create a state store state, err := state.NewStateStore(logOutput) if err != nil { @@ -68,12 +68,12 @@ func NewFSM(evalBroker *EvalBroker, periodic PeriodicRunner, logOutput io.Writer } fsm := &nomadFSM{ - evalBroker: evalBroker, - periodicRunner: periodic, - logOutput: logOutput, - logger: log.New(logOutput, "", log.LstdFlags), - state: state, - timetable: NewTimeTable(timeTableGranularity, timeTableLimit), + evalBroker: evalBroker, + periodicDispatcher: periodic, + logOutput: logOutput, + logger: log.New(logOutput, "", log.LstdFlags), + state: state, + timetable: NewTimeTable(timeTableGranularity, timeTableLimit), } return fsm, nil } @@ -211,8 +211,8 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { // If it is periodic, insert it into the periodic runner and record the // time it was inserted. if req.Job.IsPeriodic() { - if err := n.periodicRunner.Add(req.Job); err != nil { - n.logger.Printf("[ERR] nomad.fsm: PeriodicRunner.Add failed: %v", err) + if err := n.periodicDispatcher.Add(req.Job); err != nil { + n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) return err } @@ -237,7 +237,7 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { } if parent.IsPeriodic() { - t, err := n.periodicRunner.LaunchTime(req.Job.ID) + t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) if err != nil { n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) return err @@ -272,8 +272,8 @@ func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { } if job.IsPeriodic() { - if err := n.periodicRunner.Remove(req.JobID); err != nil { - n.logger.Printf("[ERR] nomad.fsm: PeriodicRunner.Remove failed: %v", err) + if err := n.periodicDispatcher.Remove(req.JobID); err != nil { + n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) return err } diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index c1bf4dfa5..ef76486a8 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -43,7 +43,8 @@ func testStateStore(t *testing.T) *state.StateStore { } func testFSM(t *testing.T) *nomadFSM { - fsm, err := NewFSM(testBroker(t, 0), NewMockPeriodic(), os.Stderr) + p, _ := testPeriodicDispatcher() + fsm, err := NewFSM(testBroker(t, 0), p, os.Stderr) if err != nil { t.Fatalf("err: %v", err) } @@ -249,7 +250,7 @@ func TestFSM_RegisterJob(t *testing.T) { } // Verify it was added to the periodic runner. - if _, ok := fsm.periodicRunner.(*MockPeriodic).Jobs[job.ID]; !ok { + if _, ok := fsm.periodicDispatcher.tracked[job.ID]; !ok { t.Fatal("job not added to periodic runner") } @@ -306,7 +307,7 @@ func TestFSM_DeregisterJob(t *testing.T) { } // Verify it was removed from the periodic runner. - if _, ok := fsm.periodicRunner.(*MockPeriodic).Jobs[job.ID]; ok { + if _, ok := fsm.periodicDispatcher.tracked[job.ID]; ok { t.Fatal("job not removed from periodic runner") } diff --git a/nomad/leader_test.go b/nomad/leader_test.go index 4c035efcd..6dab28cae 100644 --- a/nomad/leader_test.go +++ b/nomad/leader_test.go @@ -391,6 +391,10 @@ func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { // Sleep till after the job should have been launched. time.Sleep(3 * time.Second) + // Get the current time to ensure the launch time is after this once we + // restore. + now := time.Now() + // Restore the periodic dispatcher. s1.periodicDispatcher.SetEnabled(true) s1.periodicDispatcher.Start() @@ -402,13 +406,13 @@ func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { } // Check that an eval was made. - evals, err := createdEvals(s1.periodicDispatcher, job.ID) - if err != nil { - t.Fatalf("createdEvals(%v) failed: %v", job.ID, err) + last, err := s1.fsm.State().PeriodicLaunchByID(job.ID) + if err != nil || last == nil { + t.Fatalf("failed to get periodic launch time: %v", err) } - if len(evals) != 1 { - t.Fatalf("restorePeriodicDispatcher() didn't create an eval") + if last.Launch.Before(now) { + t.Fatalf("restorePeriodicDispatcher did not force launch") } } @@ -453,26 +457,12 @@ func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) { } // Check that an eval was made. - evals, err := createdEvals(s1.periodicDispatcher, job.ID) - if err != nil { - t.Fatalf("createdEvals(%v) failed: %v", job.ID, err) + last, err := s1.fsm.State().PeriodicLaunchByID(job.ID) + if err != nil || last == nil { + t.Fatalf("failed to get periodic launch time: %v", err) } - - if len(evals) != 2 { - t.Fatalf("restorePeriodicDispatcher() didn't create an eval") - } - - // Check it was for the right time. - match := false - for _, eval := range evals { - if eval.JobLaunch != past && eval.JobLaunch != future { - match = true - break - } - } - - if !match { - t.Fatal("restorePeriodicDispatcher() didn't create the correct eval") + if last.Launch == past { + t.Fatalf("restorePeriodicDispatcher did not force launch") } } diff --git a/nomad/periodic.go b/nomad/periodic.go index 0d9e1547e..36678d342 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -19,26 +19,13 @@ const ( JobLaunchSuffix = "/periodic-" ) -// PeriodicRunner is the interface for tracking and launching periodic jobs at -// their periodic spec. -type PeriodicRunner interface { - Start() - SetEnabled(enabled bool) - Add(job *structs.Job) error - Remove(jobID string) error - ForceRun(jobID string) error - Tracked() []*structs.Job - Flush() - LaunchTime(jobID string) (time.Time, error) -} - // PeriodicDispatch is used to track and launch periodic jobs. It maintains the // set of periodic jobs and creates derived jobs and evaluations per // instantiation which is determined by the periodic spec. type PeriodicDispatch struct { - srv *Server - enabled bool - running bool + dispatcher JobEvalDispatcher + enabled bool + running bool tracked map[string]*structs.Job heap *periodicHeap @@ -50,17 +37,60 @@ type PeriodicDispatch struct { l sync.RWMutex } +// JobEvalDispatcher is an interface to submit jobs and have evaluations created +// for them. +type JobEvalDispatcher interface { + // DispatchJob takes a job a new, untracked job and creates an evaluation + // for it. + DispatchJob(job *structs.Job) error +} + +// DispatchJob creates an evaluation for the passed job and commits both the +// evaluation and the job to the raft log. +func (s *Server) DispatchJob(job *structs.Job) error { + // Commit this update via Raft + req := structs.JobRegisterRequest{Job: job} + _, index, err := s.raftApply(structs.JobRegisterRequestType, req) + if err != nil { + return err + } + + // Create a new evaluation + eval := &structs.Evaluation{ + ID: structs.GenerateUUID(), + Priority: job.Priority, + Type: job.Type, + TriggeredBy: structs.EvalTriggerJobRegister, + JobID: job.ID, + JobModifyIndex: index, + Status: structs.EvalStatusPending, + } + update := &structs.EvalUpdateRequest{ + Evals: []*structs.Evaluation{eval}, + } + + // Commit this evaluation via Raft + // XXX: There is a risk of partial failure where the JobRegister succeeds + // but that the EvalUpdate does not. + _, _, err = s.raftApply(structs.EvalUpdateRequestType, update) + if err != nil { + return err + } + + return nil +} + // NewPeriodicDispatch returns a periodic dispatcher that is used to track and // launch periodic jobs. -func NewPeriodicDispatch(srv *Server) *PeriodicDispatch { +func NewPeriodicDispatch(logger *log.Logger, dispatcher JobEvalDispatcher) *PeriodicDispatch { return &PeriodicDispatch{ - srv: srv, - tracked: make(map[string]*structs.Job), - heap: NewPeriodicHeap(), - updateCh: make(chan struct{}, 1), - stopCh: make(chan struct{}), - waitCh: make(chan struct{}), - logger: srv.logger, + dispatcher: dispatcher, + tracked: make(map[string]*structs.Job), + heap: NewPeriodicHeap(), + updateCh: make(chan struct{}, 1), + stopCh: make(chan struct{}), + waitCh: make(chan struct{}), + logger: logger, } } @@ -121,7 +151,7 @@ func (p *PeriodicDispatch) Add(job *structs.Job) error { p.removeLocked(job.ID) } - // If the job is diabled and we aren't tracking it, do nothing. + // If the job is disabled and we aren't tracking it, do nothing. return nil } @@ -219,7 +249,11 @@ func (p *PeriodicDispatch) run() { for p.shouldRun() { job, launch, err := p.nextLaunch() if err != nil { - p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) + p.l.RLock() + defer p.l.RUnlock() + if !p.running { + p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) + } return } else if job == nil { return @@ -325,34 +359,8 @@ func (p *PeriodicDispatch) createEval(periodicJob *structs.Job, time time.Time) return err } - // Commit this update via Raft - req := structs.JobRegisterRequest{Job: derived} - _, index, err := p.srv.raftApply(structs.JobRegisterRequestType, req) - if err != nil { - p.logger.Printf("[ERR] nomad.periodic: registering child job for periodic job %q failed: %v", periodicJob.ID, err) - return err - } - - // Create a new evaluation - eval := &structs.Evaluation{ - ID: structs.GenerateUUID(), - Priority: derived.Priority, - Type: derived.Type, - TriggeredBy: structs.EvalTriggerJobRegister, - JobID: derived.ID, - JobModifyIndex: index, - Status: structs.EvalStatusPending, - } - update := &structs.EvalUpdateRequest{ - Evals: []*structs.Evaluation{eval}, - } - - // Commit this evaluation via Raft - // XXX: There is a risk of partial failure where the JobRegister succeeds - // but that the EvalUpdate does not. - _, _, err = p.srv.raftApply(structs.EvalUpdateRequestType, update) - if err != nil { - p.logger.Printf("[ERR] nomad.periodic: creating eval for %q failed: %v", derived.ID, err) + if err := p.dispatcher.DispatchJob(derived); err != nil { + p.logger.Printf("[ERR] nomad.periodic: failed to dispatch job %q: %v", periodicJob.ID, err) return err } @@ -361,7 +369,6 @@ func (p *PeriodicDispatch) createEval(periodicJob *structs.Job, time time.Time) // deriveJob instantiates a new job based on the passed periodic job and the // launch time. -// TODO: these jobs need to be marked as GC'able func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) ( derived *structs.Job, err error) { @@ -384,13 +391,14 @@ func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) ( derived.ID = p.derivedJobID(periodicJob, time) derived.Name = derived.ID derived.Periodic = nil + derived.GC = true return } // deriveJobID returns a job ID based on the parent periodic job and the launch // time. func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time) string { - return fmt.Sprintf("%s%s%d", periodicJob.ID, JobLaunchSuffix, time.Unix()) + return fmt.Sprintf("%s%s%d", periodicJob.ID, JobLaunchSuffix, time.UnixNano()) } // LaunchTime returns the launch time of the job. This is only valid for @@ -406,7 +414,7 @@ func (p *PeriodicDispatch) LaunchTime(jobID string) (time.Time, error) { return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) } - return time.Unix(int64(launch), 0), nil + return time.Unix(0, int64(launch)), nil } // Flush clears the state of the PeriodicDispatcher diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index 4cde0b3f9..c5f1510d4 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -2,150 +2,98 @@ package nomad import ( "fmt" + "log" "math/rand" + "os" "reflect" "sort" "strconv" "strings" + "sync" "testing" "time" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" - "github.com/hashicorp/nomad/testutil" ) -// MockPeriodic can be used by other tests that want to mock the periodic -// dispatcher. -type MockPeriodic struct { - Enabled bool - Jobs map[string]*structs.Job +type MockJobEvalDispatcher struct { + Jobs map[string]*structs.Job + lock sync.Mutex } -func NewMockPeriodic() *MockPeriodic { - return &MockPeriodic{Jobs: make(map[string]*structs.Job)} +func NewMockJobEvalDispatcher() *MockJobEvalDispatcher { + return &MockJobEvalDispatcher{Jobs: make(map[string]*structs.Job)} } -func (m *MockPeriodic) SetEnabled(enabled bool) { - m.Enabled = enabled -} - -func (m *MockPeriodic) Add(job *structs.Job) error { - if job == nil { - return fmt.Errorf("Must pass non nil job") - } - +func (m *MockJobEvalDispatcher) DispatchJob(job *structs.Job) error { + m.lock.Lock() + defer m.lock.Unlock() m.Jobs[job.ID] = job return nil } -func (m *MockPeriodic) Remove(jobID string) error { - delete(m.Jobs, jobID) - return nil -} - -func (m *MockPeriodic) ForceRun(jobID string) error { - return nil -} - -func (m *MockPeriodic) LaunchTime(jobID string) (time.Time, error) { - return time.Time{}, nil -} - -func (m *MockPeriodic) Start() {} - -func (m *MockPeriodic) Flush() { - m.Jobs = make(map[string]*structs.Job) -} - -func (m *MockPeriodic) Tracked() []*structs.Job { - tracked := make([]*structs.Job, len(m.Jobs)) - i := 0 +// LaunchTimes returns the launch times of child jobs in sorted order. +func (m *MockJobEvalDispatcher) LaunchTimes(p *PeriodicDispatch, parentID string) ([]time.Time, error) { + m.lock.Lock() + defer m.lock.Unlock() + var launches []time.Time for _, job := range m.Jobs { - tracked[i] = job - i++ + if job.ParentID != parentID { + continue + } + + t, err := p.LaunchTime(job.ID) + if err != nil { + return nil, err + } + launches = append(launches, t) } - return tracked + sort.Sort(times(launches)) + return launches, nil } +type times []time.Time + +func (t times) Len() int { return len(t) } +func (t times) Swap(i, j int) { t[i], t[j] = t[j], t[i] } +func (t times) Less(i, j int) bool { return t[i].Before(t[j]) } + +// testPeriodicDispatcher returns an enabled PeriodicDispatcher which uses the +// MockJobEvalDispatcher. +func testPeriodicDispatcher() (*PeriodicDispatch, *MockJobEvalDispatcher) { + logger := log.New(os.Stderr, "", log.LstdFlags) + m := NewMockJobEvalDispatcher() + d := NewPeriodicDispatch(logger, m) + d.SetEnabled(true) + d.Start() + return d, m +} + +// testPeriodicJob is a helper that creates a periodic job that launches at the +// passed times. func testPeriodicJob(times ...time.Time) *structs.Job { job := mock.PeriodicJob() job.Periodic.SpecType = structs.PeriodicSpecTest l := make([]string, len(times)) for i, t := range times { - l[i] = strconv.Itoa(int(t.Unix())) + l[i] = strconv.Itoa(int(t.UnixNano())) } job.Periodic.Spec = strings.Join(l, ",") return job } -// createdEvals returns the set of evaluations created from the passed periodic -// job in sorted order, with the earliest job launch first. -func createdEvals(p *PeriodicDispatch, periodicJobID string) (PeriodicEvals, error) { - state := p.srv.fsm.State() - iter, err := state.ChildJobs(periodicJobID) - if err != nil { - return nil, fmt.Errorf("failed to look up children of job %v: %v", periodicJobID, err) - } - - var evals PeriodicEvals - for i := iter.Next(); i != nil; i = iter.Next() { - job := i.(*structs.Job) - childEvals, err := state.EvalsByJob(job.ID) - if err != nil { - return nil, fmt.Errorf("failed to look up evals for job %v: %v", job.ID, err) - } - - for _, eval := range childEvals { - launch, err := p.LaunchTime(eval.JobID) - if err != nil { - return nil, fmt.Errorf("failed to get launch time for eval %v: %v", eval, err) - } - - pEval := &PeriodicEval{ - Eval: eval, - JobLaunch: launch, - } - - evals = append(evals, pEval) - } - } - - // Return the sorted evals. - sort.Sort(evals) - return evals, nil -} - -// PeriodicEval stores the evaluation and launch time for an instantiated -// periodic job. -type PeriodicEval struct { - Eval *structs.Evaluation - JobLaunch time.Time -} - -// For sorting. -type PeriodicEvals []*PeriodicEval - -func (p PeriodicEvals) Len() int { return len(p) } -func (p PeriodicEvals) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -func (p PeriodicEvals) Less(i, j int) bool { return p[i].JobLaunch.Before(p[j].JobLaunch) } - func TestPeriodicDispatch_Add_NonPeriodic(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) - + p, _ := testPeriodicDispatcher() job := mock.Job() - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add of non-periodic job failed: %v; expect no-op", err) } - tracked := s1.periodicDispatcher.Tracked() + tracked := p.Tracked() if len(tracked) != 0 { t.Fatalf("Add of non-periodic job should be no-op: %v", tracked) } @@ -153,29 +101,24 @@ func TestPeriodicDispatch_Add_NonPeriodic(t *testing.T) { func TestPeriodicDispatch_Add_UpdateJob(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) - + p, _ := testPeriodicDispatcher() job := mock.PeriodicJob() - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } - tracked := s1.periodicDispatcher.Tracked() + tracked := p.Tracked() if len(tracked) != 1 { t.Fatalf("Add didn't track the job: %v", tracked) } // Update the job and add it again. job.Periodic.Spec = "foo" - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } - tracked = s1.periodicDispatcher.Tracked() + tracked = p.Tracked() if len(tracked) != 1 { t.Fatalf("Add didn't update: %v", tracked) } @@ -187,83 +130,70 @@ func TestPeriodicDispatch_Add_UpdateJob(t *testing.T) { func TestPeriodicDispatch_Add_TriggersUpdate(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) + p, m := testPeriodicDispatcher() // Create a job that won't be evalauted for a while. job := testPeriodicJob(time.Now().Add(10 * time.Second)) // Add it. - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } // Update it to be sooner and re-add. expected := time.Now().Add(1 * time.Second) - job.Periodic.Spec = fmt.Sprintf("%d", expected.Unix()) - if err := s1.periodicDispatcher.Add(job); err != nil { + job.Periodic.Spec = fmt.Sprintf("%d", expected.UnixNano()) + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } + // Check that nothing is created. + if _, ok := m.Jobs[job.ID]; ok { + t.Fatalf("periodic dispatcher created eval at the wrong time") + } + time.Sleep(2 * time.Second) - // Check that an eval was created for the right time. - evals, err := createdEvals(s1.periodicDispatcher, job.ID) + // Check that job was launched correctly. + times, err := m.LaunchTimes(p, job.ID) if err != nil { - t.Fatalf("createdEvals(%v) failed %v", job.ID, err) + t.Fatalf("failed to get launch times for job %q", job.ID) } - - if len(evals) != 1 { - t.Fatalf("Unexpected number of evals created; got %#v; want 1", evals) + if len(times) != 1 { + t.Fatalf("incorrect number of launch times for job %q", job.ID) } - - eval := evals[0].Eval - expID := s1.periodicDispatcher.derivedJobID(job, expected) - if eval.JobID != expID { - t.Fatalf("periodic dispatcher created eval at the wrong time; got %v; want %v", - eval.JobID, expID) + if times[0] != expected { + t.Fatalf("periodic dispatcher created eval for time %v; want %v", times[0], expected) } } func TestPeriodicDispatch_Remove_Untracked(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) - - if err := s1.periodicDispatcher.Remove("foo"); err != nil { + p, _ := testPeriodicDispatcher() + if err := p.Remove("foo"); err != nil { t.Fatalf("Remove failed %v; expected a no-op", err) } } func TestPeriodicDispatch_Remove_Tracked(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) + p, _ := testPeriodicDispatcher() job := mock.PeriodicJob() - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } - tracked := s1.periodicDispatcher.Tracked() + tracked := p.Tracked() if len(tracked) != 1 { t.Fatalf("Add didn't track the job: %v", tracked) } - if err := s1.periodicDispatcher.Remove(job.ID); err != nil { + if err := p.Remove(job.ID); err != nil { t.Fatalf("Remove failed %v", err) } - tracked = s1.periodicDispatcher.Tracked() + tracked = p.Tracked() if len(tracked) != 0 { t.Fatalf("Remove didn't untrack the job: %v", tracked) } @@ -271,90 +201,71 @@ func TestPeriodicDispatch_Remove_Tracked(t *testing.T) { func TestPeriodicDispatch_Remove_TriggersUpdate(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) + p, _ := testPeriodicDispatcher() // Create a job that will be evaluated soon. job := testPeriodicJob(time.Now().Add(1 * time.Second)) // Add it. - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } // Remove the job. - if err := s1.periodicDispatcher.Remove(job.ID); err != nil { + if err := p.Remove(job.ID); err != nil { t.Fatalf("Add failed %v", err) } time.Sleep(2 * time.Second) // Check that an eval wasn't created. - evals, err := createdEvals(s1.periodicDispatcher, job.ID) - if err != nil { - t.Fatalf("createdEvals(%v) failed %v", job.ID, err) - } - - if len(evals) != 0 { - t.Fatalf("Remove didn't cancel creation of an eval: %#v", evals) + d := p.dispatcher.(*MockJobEvalDispatcher) + if _, ok := d.Jobs[job.ID]; ok { + t.Fatalf("Remove didn't cancel creation of an eval") } } func TestPeriodicDispatch_ForceRun_Untracked(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) + p, _ := testPeriodicDispatcher() - if err := s1.periodicDispatcher.ForceRun("foo"); err == nil { + if err := p.ForceRun("foo"); err == nil { t.Fatal("ForceRun of untracked job should fail") } } func TestPeriodicDispatch_ForceRun_Tracked(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) + p, m := testPeriodicDispatcher() // Create a job that won't be evalauted for a while. job := testPeriodicJob(time.Now().Add(10 * time.Second)) // Add it. - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } // ForceRun the job - if err := s1.periodicDispatcher.ForceRun(job.ID); err != nil { + if err := p.ForceRun(job.ID); err != nil { t.Fatalf("ForceRun failed %v", err) } - // Check that an eval was created for the right time. - evals, err := createdEvals(s1.periodicDispatcher, job.ID) + // Check that job was launched correctly. + launches, err := m.LaunchTimes(p, job.ID) if err != nil { - t.Fatalf("createdEvals(%v) failed %v", job.ID, err) + t.Fatalf("failed to get launch times for job %q: %v", job.ID, err) } - - if len(evals) != 1 { - t.Fatalf("Unexpected number of evals created; got %#v; want 1", evals) + l := len(launches) + if l != 1 { + t.Fatalf("restorePeriodicDispatcher() created an unexpected"+ + " number of evals; got %d; want 1", l) } } func TestPeriodicDispatch_Run_Multiple(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) + p, m := testPeriodicDispatcher() // Create a job that will be launched twice. launch1 := time.Now().Add(1 * time.Second) @@ -362,34 +273,31 @@ func TestPeriodicDispatch_Run_Multiple(t *testing.T) { job := testPeriodicJob(launch1, launch2) // Add it. - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } time.Sleep(3 * time.Second) - // Check that the evals were created correctly - evals, err := createdEvals(s1.periodicDispatcher, job.ID) + // Check that job was launched correctly. + times, err := m.LaunchTimes(p, job.ID) if err != nil { - t.Fatalf("createdEvals(%v) failed %v", job.ID, err) + t.Fatalf("failed to get launch times for job %q", job.ID) } - - d := s1.periodicDispatcher - expected := []string{d.derivedJobID(job, launch1), d.derivedJobID(job, launch2)} - for i, eval := range evals { - if eval.Eval.JobID != expected[i] { - t.Fatalf("eval created incorrectly; got %v; want %v", eval.Eval.JobID, expected[i]) - } + if len(times) != 2 { + t.Fatalf("incorrect number of launch times for job %q", job.ID) + } + if times[0] != launch1 { + t.Fatalf("periodic dispatcher created eval for time %v; want %v", times[0], launch1) + } + if times[1] != launch2 { + t.Fatalf("periodic dispatcher created eval for time %v; want %v", times[1], launch2) } } func TestPeriodicDispatch_Run_SameTime(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) + p, m := testPeriodicDispatcher() // Create two job that will be launched at the same time. launch := time.Now().Add(1 * time.Second) @@ -397,30 +305,26 @@ func TestPeriodicDispatch_Run_SameTime(t *testing.T) { job2 := testPeriodicJob(launch) // Add them. - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } - if err := s1.periodicDispatcher.Add(job2); err != nil { + if err := p.Add(job2); err != nil { t.Fatalf("Add failed %v", err) } time.Sleep(2 * time.Second) - // Check that the evals were created correctly + // Check that the jobs were launched correctly. for _, job := range []*structs.Job{job, job2} { - evals, err := createdEvals(s1.periodicDispatcher, job.ID) + times, err := m.LaunchTimes(p, job.ID) if err != nil { - t.Fatalf("createdEvals(%v) failed %v", job.ID, err) + t.Fatalf("failed to get launch times for job %q", job.ID) } - - if len(evals) != 1 { - t.Fatalf("expected 1 eval; got %#v", evals) + if len(times) != 1 { + t.Fatalf("incorrect number of launch times for job %q; got %d; want 1", job.ID, len(times)) } - - d := s1.periodicDispatcher - expected := d.derivedJobID(job, launch) - if evals[0].Eval.JobID != expected { - t.Fatalf("eval created incorrectly; got %v; want %v", evals[0].Eval.JobID, expected) + if times[0] != launch { + t.Fatalf("periodic dispatcher created eval for time %v; want %v", times[0], launch) } } } @@ -430,11 +334,7 @@ func TestPeriodicDispatch_Run_SameTime(t *testing.T) { // behavior. func TestPeriodicDispatch_Complex(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) + p, m := testPeriodicDispatcher() // Create some jobs launching at different times. now := time.Now() @@ -463,16 +363,12 @@ func TestPeriodicDispatch_Complex(t *testing.T) { job8 := testPeriodicJob(launch2) // Create a map of expected eval job ids. - d := s1.periodicDispatcher - expected := map[string][]string{ - job1.ID: []string{d.derivedJobID(job1, same)}, - job2.ID: []string{d.derivedJobID(job2, same)}, + expected := map[string][]time.Time{ + job1.ID: []time.Time{same}, + job2.ID: []time.Time{same}, job3.ID: nil, - job4.ID: []string{ - d.derivedJobID(job4, launch1), - d.derivedJobID(job4, launch3), - }, - job5.ID: []string{d.derivedJobID(job5, launch2)}, + job4.ID: []time.Time{launch1, launch3}, + job5.ID: []time.Time{launch2}, job6.ID: nil, job7.ID: nil, job8.ID: nil, @@ -485,54 +381,46 @@ func TestPeriodicDispatch_Complex(t *testing.T) { shuffle(toDelete) for _, job := range jobs { - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } } for _, job := range toDelete { - if err := s1.periodicDispatcher.Remove(job.ID); err != nil { + if err := p.Remove(job.ID); err != nil { t.Fatalf("Remove failed %v", err) } } time.Sleep(4 * time.Second) - actual := make(map[string][]string, len(expected)) + actual := make(map[string][]time.Time, len(expected)) for _, job := range jobs { - evals, err := createdEvals(s1.periodicDispatcher, job.ID) + launches, err := m.LaunchTimes(p, job.ID) if err != nil { - t.Fatalf("createdEvals(%v) failed %v", job.ID, err) + t.Fatalf("LaunchTimes(%v) failed %v", job.ID, err) } - var jobs []string - for _, eval := range evals { - jobs = append(jobs, eval.Eval.JobID) - } - actual[job.ID] = jobs + actual[job.ID] = launches } if !reflect.DeepEqual(actual, expected) { - t.Fatalf("Unexpected evals; got %#v; want %#v", actual, expected) + t.Fatalf("Unexpected launches; got %#v; want %#v", actual, expected) } } func TestPeriodicDispatch_NextLaunch(t *testing.T) { t.Parallel() - s1 := testServer(t, func(c *Config) { - c.NumSchedulers = 0 // Prevent automatic dequeue - }) - defer s1.Shutdown() - testutil.WaitForLeader(t, s1.RPC) + p, _ := testPeriodicDispatcher() // Create two job that will be launched at the same time. invalid := time.Unix(0, 0) - expected := time.Now().Round(1 * time.Second).Add(1 * time.Second) + expected := time.Now().Add(1 * time.Second) job := testPeriodicJob(invalid) job2 := testPeriodicJob(expected) // Make sure the periodic dispatcher isn't running. - close(s1.periodicDispatcher.stopCh) - s1.periodicDispatcher.stopCh = make(chan struct{}) + close(p.stopCh) + p.stopCh = make(chan struct{}) // Run nextLaunch. timeout := make(chan struct{}) @@ -540,18 +428,18 @@ func TestPeriodicDispatch_NextLaunch(t *testing.T) { var launch time.Time var err error go func() { - j, launch, err = s1.periodicDispatcher.nextLaunch() + j, launch, err = p.nextLaunch() close(timeout) }() // Add them. - if err := s1.periodicDispatcher.Add(job); err != nil { + if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } // Delay adding a valid job. time.Sleep(200 * time.Millisecond) - if err := s1.periodicDispatcher.Add(job2); err != nil { + if err := p.Add(job2); err != nil { t.Fatalf("Add failed %v", err) } diff --git a/nomad/server.go b/nomad/server.go index 3a1c25d83..3a17c8224 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -185,7 +185,7 @@ func NewServer(config *Config) (*Server, error) { } // Create the periodic dispatcher for launching periodic jobs. - s.periodicDispatcher = NewPeriodicDispatch(s) + s.periodicDispatcher = NewPeriodicDispatch(s.logger, s) // Initialize the RPC layer // TODO: TLS... diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 0773e94fb..7f56fa169 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -938,7 +938,7 @@ const ( PeriodicSpecCron = "cron" // PeriodicSpecTest is only used by unit tests. It is a sorted, comma - // seperated list of unix timestamps at which to launch. + // seperated list of unix nanosecond timestamps at which to launch. PeriodicSpecTest = "_internal_test" ) @@ -1003,7 +1003,7 @@ func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { return time.Time{} } - times[i] = time.Unix(int64(unix), 0) + times[i] = time.Unix(0, int64(unix)) } // Find the next match From 3a400432dc38d0536eea4daab06e3502ef376d7c Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 21 Dec 2015 13:25:50 -0800 Subject: [PATCH 32/45] Always add jobs to periodic tracker --- nomad/fsm.go | 23 +++++++++++++++-------- nomad/periodic_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/nomad/fsm.go b/nomad/fsm.go index f9c43f7f4..86ec519a7 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -208,15 +208,22 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { return err } - // If it is periodic, insert it into the periodic runner and record the - // time it was inserted. - if req.Job.IsPeriodic() { - if err := n.periodicDispatcher.Add(req.Job); err != nil { - n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) - return err - } + // We always add the job to the periodic dispatcher because there is the + // possibility that the periodic spec was removed and then we should stop + // tracking it. + if err := n.periodicDispatcher.Add(req.Job); err != nil { + n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) + return err + } - // Record the insertion time as a launch. + // If it is periodic, record the time it was inserted. This is necessary for + // recovering during leader election. It is possible that from the time it + // is added to when it was suppose to launch, leader election occurs and the + // job was not launched. In this case, we use the insertion time to + // determine if a launch was missed. + if req.Job.IsPeriodic() { + // Record the insertion time as a launch. We overload the launch table + // such that the first entry is the insertion time. launch := &structs.PeriodicLaunch{ID: req.Job.ID, Launch: time.Now()} if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index c5f1510d4..966a150a6 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -128,6 +128,31 @@ func TestPeriodicDispatch_Add_UpdateJob(t *testing.T) { } } +func TestPeriodicDispatch_Add_RemoveJob(t *testing.T) { + t.Parallel() + p, _ := testPeriodicDispatcher() + job := mock.PeriodicJob() + if err := p.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + tracked := p.Tracked() + if len(tracked) != 1 { + t.Fatalf("Add didn't track the job: %v", tracked) + } + + // Update the job to be non-periodic and add it again. + job.Periodic = nil + if err := p.Add(job); err != nil { + t.Fatalf("Add failed %v", err) + } + + tracked = p.Tracked() + if len(tracked) != 0 { + t.Fatalf("Add didn't remove: %v", tracked) + } +} + func TestPeriodicDispatch_Add_TriggersUpdate(t *testing.T) { t.Parallel() p, m := testPeriodicDispatcher() From 6bc0737970f227b3536ba0b9f6a9aba9080f5c51 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 21 Dec 2015 13:55:26 -0800 Subject: [PATCH 33/45] Unix timestamps not UnixNano --- nomad/periodic.go | 4 ++-- nomad/periodic_test.go | 22 +++++++++++----------- nomad/structs/structs.go | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/nomad/periodic.go b/nomad/periodic.go index 36678d342..47a0f76c2 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -398,7 +398,7 @@ func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) ( // deriveJobID returns a job ID based on the parent periodic job and the launch // time. func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time) string { - return fmt.Sprintf("%s%s%d", periodicJob.ID, JobLaunchSuffix, time.UnixNano()) + return fmt.Sprintf("%s%s%d", periodicJob.ID, JobLaunchSuffix, time.Unix()) } // LaunchTime returns the launch time of the job. This is only valid for @@ -414,7 +414,7 @@ func (p *PeriodicDispatch) LaunchTime(jobID string) (time.Time, error) { return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) } - return time.Unix(0, int64(launch)), nil + return time.Unix(int64(launch), 0), nil } // Flush clears the state of the PeriodicDispatcher diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index 966a150a6..c2e395919 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -78,7 +78,7 @@ func testPeriodicJob(times ...time.Time) *structs.Job { l := make([]string, len(times)) for i, t := range times { - l[i] = strconv.Itoa(int(t.UnixNano())) + l[i] = strconv.Itoa(int(t.Round(1 * time.Second).Unix())) } job.Periodic.Spec = strings.Join(l, ",") @@ -166,8 +166,8 @@ func TestPeriodicDispatch_Add_TriggersUpdate(t *testing.T) { } // Update it to be sooner and re-add. - expected := time.Now().Add(1 * time.Second) - job.Periodic.Spec = fmt.Sprintf("%d", expected.UnixNano()) + expected := time.Now().Round(1 * time.Second).Add(1 * time.Second) + job.Periodic.Spec = fmt.Sprintf("%d", expected.Unix()) if err := p.Add(job); err != nil { t.Fatalf("Add failed %v", err) } @@ -293,8 +293,8 @@ func TestPeriodicDispatch_Run_Multiple(t *testing.T) { p, m := testPeriodicDispatcher() // Create a job that will be launched twice. - launch1 := time.Now().Add(1 * time.Second) - launch2 := time.Now().Add(2 * time.Second) + launch1 := time.Now().Round(1 * time.Second).Add(1 * time.Second) + launch2 := time.Now().Round(1 * time.Second).Add(2 * time.Second) job := testPeriodicJob(launch1, launch2) // Add it. @@ -325,7 +325,7 @@ func TestPeriodicDispatch_Run_SameTime(t *testing.T) { p, m := testPeriodicDispatcher() // Create two job that will be launched at the same time. - launch := time.Now().Add(1 * time.Second) + launch := time.Now().Round(1 * time.Second).Add(1 * time.Second) job := testPeriodicJob(launch) job2 := testPeriodicJob(launch) @@ -362,11 +362,11 @@ func TestPeriodicDispatch_Complex(t *testing.T) { p, m := testPeriodicDispatcher() // Create some jobs launching at different times. - now := time.Now() + now := time.Now().Round(1 * time.Second) same := now.Add(1 * time.Second) launch1 := same.Add(1 * time.Second) - launch2 := same.Add(1500 * time.Millisecond) - launch3 := same.Add(2 * time.Second) + launch2 := same.Add(2 * time.Second) + launch3 := same.Add(3 * time.Second) invalid := now.Add(-200 * time.Second) // Create two jobs launching at the same time. @@ -417,7 +417,7 @@ func TestPeriodicDispatch_Complex(t *testing.T) { } } - time.Sleep(4 * time.Second) + time.Sleep(5 * time.Second) actual := make(map[string][]time.Time, len(expected)) for _, job := range jobs { launches, err := m.LaunchTimes(p, job.ID) @@ -439,7 +439,7 @@ func TestPeriodicDispatch_NextLaunch(t *testing.T) { // Create two job that will be launched at the same time. invalid := time.Unix(0, 0) - expected := time.Now().Add(1 * time.Second) + expected := time.Now().Round(1 * time.Second).Add(1 * time.Second) job := testPeriodicJob(invalid) job2 := testPeriodicJob(expected) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 7f56fa169..0773e94fb 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -938,7 +938,7 @@ const ( PeriodicSpecCron = "cron" // PeriodicSpecTest is only used by unit tests. It is a sorted, comma - // seperated list of unix nanosecond timestamps at which to launch. + // seperated list of unix timestamps at which to launch. PeriodicSpecTest = "_internal_test" ) @@ -1003,7 +1003,7 @@ func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { return time.Time{} } - times[i] = time.Unix(0, int64(unix)) + times[i] = time.Unix(int64(unix), 0) } // Find the next match From 8b76a0718ccbc491396901582c261eebc1120f48 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 21 Dec 2015 14:19:53 -0800 Subject: [PATCH 34/45] Make nomad status id aware of periodic jobs --- command/run.go | 6 +++--- command/status.go | 48 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/command/run.go b/command/run.go index d50098d16..f54f96298 100644 --- a/command/run.go +++ b/command/run.go @@ -94,7 +94,7 @@ func (c *RunCommand) Run(args []string) int { periodic := job.IsPeriodic() // Convert it to something we can use - apiJob, err := convertJob(job) + apiJob, err := convertStructJob(job) if err != nil { c.Ui.Error(fmt.Sprintf("Error converting job: %s", err)) return 1 @@ -132,9 +132,9 @@ func (c *RunCommand) Run(args []string) int { } -// convertJob is used to take a *structs.Job and convert it to an *api.Job. +// convertStructJob is used to take a *structs.Job and convert it to an *api.Job. // This function is just a hammer and probably needs to be revisited. -func convertJob(in *structs.Job) (*api.Job, error) { +func convertStructJob(in *structs.Job) (*api.Job, error) { gob.Register([]map[string]interface{}{}) gob.Register([]interface{}{}) var apiJob *api.Job diff --git a/command/status.go b/command/status.go index 4a736dc7a..de5128a64 100644 --- a/command/status.go +++ b/command/status.go @@ -1,8 +1,14 @@ package command import ( + "bytes" + "encoding/gob" "fmt" "strings" + "time" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/nomad/structs" ) type StatusCommand struct { @@ -93,6 +99,14 @@ func (c *StatusCommand) Run(args []string) int { return 1 } + // Check if it is periodic + sJob, err := convertApiJob(job) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error converting job: %s", err)) + return 1 + } + periodic := sJob.IsPeriodic() + // Format the job info basic := []string{ fmt.Sprintf("ID|%s", job.ID), @@ -101,10 +115,19 @@ func (c *StatusCommand) Run(args []string) int { fmt.Sprintf("Priority|%d", job.Priority), fmt.Sprintf("Datacenters|%s", strings.Join(job.Datacenters, ",")), fmt.Sprintf("Status|%s", job.Status), + fmt.Sprintf("Periodic|%v", periodic), } - var evals, allocs []string - if !short { + if periodic { + basic = append(basic, fmt.Sprintf("Next Periodic Launch|%v", + sJob.Periodic.Next(time.Now()))) + } + + c.Ui.Output(formatKV(basic)) + + if !periodic && !short { + var evals, allocs []string + // Query the evaluations jobEvals, _, err := client.Jobs().Evaluations(jobID, nil) if err != nil { @@ -142,15 +165,28 @@ func (c *StatusCommand) Run(args []string) int { alloc.DesiredStatus, alloc.ClientStatus) } - } - // Dump the output - c.Ui.Output(formatKV(basic)) - if !short { c.Ui.Output("\n==> Evaluations") c.Ui.Output(formatList(evals)) c.Ui.Output("\n==> Allocations") c.Ui.Output(formatList(allocs)) } + return 0 } + +// convertApiJob is used to take a *api.Job and convert it to an *struct.Job. +// This function is just a hammer and probably needs to be revisited. +func convertApiJob(in *api.Job) (*structs.Job, error) { + gob.Register(map[string]interface{}{}) + gob.Register([]interface{}{}) + var structJob *structs.Job + buf := new(bytes.Buffer) + if err := gob.NewEncoder(buf).Encode(in); err != nil { + return nil, err + } + if err := gob.NewDecoder(buf).Decode(&structJob); err != nil { + return nil, err + } + return structJob, nil +} From e3231171b844428b8e07262653a6d0b130e22638 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 21 Dec 2015 15:02:27 -0800 Subject: [PATCH 35/45] Fix deadlock and test --- nomad/fsm.go | 7 +------ nomad/periodic.go | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/nomad/fsm.go b/nomad/fsm.go index 86ec519a7..fa66e9175 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -244,12 +244,7 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { } if parent.IsPeriodic() { - t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) - if err != nil { - n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) - return err - } - launch := &structs.PeriodicLaunch{ID: parentID, Launch: t} + launch := &structs.PeriodicLaunch{ID: parentID, Launch: time.Now()} if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) return err diff --git a/nomad/periodic.go b/nomad/periodic.go index 47a0f76c2..fdaab824e 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -219,7 +219,6 @@ func (p *PeriodicDispatch) removeLocked(jobID string) error { // ForceRun causes the periodic job to be evaluated immediately. func (p *PeriodicDispatch) ForceRun(jobID string) error { p.l.Lock() - defer p.l.Unlock() // Do nothing if not enabled if !p.enabled { @@ -231,6 +230,7 @@ func (p *PeriodicDispatch) ForceRun(jobID string) error { return fmt.Errorf("can't force run non-tracked job %v", jobID) } + p.l.Unlock() return p.createEval(job, time.Now()) } From d073285982328d24d53bc602e67908228bd8c0be Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 23 Dec 2015 16:31:54 -0800 Subject: [PATCH 36/45] use conditional index --- nomad/state/schema.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/nomad/state/schema.go b/nomad/state/schema.go index 0ff2ad58a..cc5899deb 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -123,8 +123,8 @@ func jobTableSchema() *memdb.TableSchema { Name: "periodic", AllowMissing: false, Unique: false, - Indexer: &memdb.FieldSetIndex{ - Field: "Periodic", + Indexer: &memdb.ConditionalIndex{ + Conditional: jobIsPeriodic, }, }, }, @@ -142,6 +142,21 @@ func jobIsGCable(obj interface{}) (bool, error) { return j.GC, nil } +// jobIsPeriodic satisfies the ConditionalIndexFunc interface and creates an index +// on whether a job is periodic. +func jobIsPeriodic(obj interface{}) (bool, error) { + j, ok := obj.(*structs.Job) + if !ok { + return false, fmt.Errorf("Unexpected type: %v", obj) + } + + if j.Periodic != nil && j.Periodic.Enabled == true { + return true, nil + } + + return false, nil +} + // periodicLaunchTableSchema returns the MemDB schema tracking the most recent // launch time for a perioidic job. func periodicLaunchTableSchema() *memdb.TableSchema { From e6f9a5bbb3fff5ec62badfdb3578ec8eaeec234d Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 23 Dec 2015 16:32:34 -0800 Subject: [PATCH 37/45] fix vet --- nomad/periodic.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/periodic.go b/nomad/periodic.go index fdaab824e..816d74857 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -165,7 +165,7 @@ func (p *PeriodicDispatch) Add(job *structs.Job) error { p.logger.Printf("[DEBUG] nomad.periodic: updated periodic job %q", job.ID) } else { if err := p.heap.Push(job, next); err != nil { - return fmt.Errorf("failed to add job %v", job.ID, err) + return fmt.Errorf("failed to add job %v: %v", job.ID, err) } p.logger.Printf("[DEBUG] nomad.periodic: registered periodic job %q", job.ID) } From 68506c7163f7c75592eb01dd177ff2c9502ade84 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 23 Dec 2015 17:32:33 -0800 Subject: [PATCH 38/45] Remove parent index --- nomad/state/schema.go | 9 ------- nomad/state/state_store.go | 12 ---------- nomad/state/state_store_test.go | 42 --------------------------------- 3 files changed, 63 deletions(-) diff --git a/nomad/state/schema.go b/nomad/state/schema.go index cc5899deb..5f68d319f 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -110,15 +110,6 @@ func jobTableSchema() *memdb.TableSchema { Conditional: jobIsGCable, }, }, - "parent": &memdb.IndexSchema{ - Name: "parent", - AllowMissing: true, - Unique: false, - Indexer: &memdb.StringFieldIndex{ - Field: "ParentID", - Lowercase: true, - }, - }, "periodic": &memdb.IndexSchema{ Name: "periodic", AllowMissing: false, diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index d73616696..fb31a2971 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -359,18 +359,6 @@ func (s *StateStore) Jobs() (memdb.ResultIterator, error) { return iter, nil } -// ChildJobs returns an iterator over all the children of the passed job. -func (s *StateStore) ChildJobs(id string) (memdb.ResultIterator, error) { - txn := s.db.Txn(false) - - // Scan all jobs whose parent is the passed id. - iter, err := txn.Get("jobs", "parent", id) - if err != nil { - return nil, err - } - return iter, nil -} - // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. func (s *StateStore) JobsByPeriodic(periodic bool) (memdb.ResultIterator, error) { txn := s.db.Txn(false) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 1e158949b..ee3a50a53 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -405,48 +405,6 @@ func TestStateStore_Jobs(t *testing.T) { } } -func TestStateStore_ChildJobs(t *testing.T) { - state := testStateStore(t) - parent := mock.Job() - var childJobs []*structs.Job - - if err := state.UpsertJob(999, parent); err != nil { - t.Fatalf("err: %v", err) - } - - for i := 0; i < 10; i++ { - job := mock.Job() - job.ParentID = parent.ID - childJobs = append(childJobs, job) - - err := state.UpsertJob(1000+uint64(i), job) - if err != nil { - t.Fatalf("err: %v", err) - } - } - - iter, err := state.ChildJobs(parent.ID) - if err != nil { - t.Fatalf("err: %v", err) - } - - var out []*structs.Job - for { - raw := iter.Next() - if raw == nil { - break - } - out = append(out, raw.(*structs.Job)) - } - - sort.Sort(JobIDSort(childJobs)) - sort.Sort(JobIDSort(out)) - - if !reflect.DeepEqual(childJobs, out) { - t.Fatalf("bad: %#v %#v", childJobs, out) - } -} - func TestStateStore_JobsByPeriodic(t *testing.T) { state := testStateStore(t) var periodic, nonPeriodic []*structs.Job From bf2aa9f733d983a4d258ef97e42aa762894c5699 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 23 Dec 2015 17:47:37 -0800 Subject: [PATCH 39/45] Always remove periodic jobs in fsm --- nomad/fsm.go | 24 ++++++++---------------- nomad/periodic.go | 13 ++++++++----- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/nomad/fsm.go b/nomad/fsm.go index fa66e9175..54a69584c 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -262,29 +262,21 @@ func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { panic(fmt.Errorf("failed to decode request: %v", err)) } - job, err := n.state.JobByID(req.JobID) - if err != nil { - n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) - return err - } - if err := n.state.DeleteJob(index, req.JobID); err != nil { n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) return err } - if job.IsPeriodic() { - if err := n.periodicDispatcher.Remove(req.JobID); err != nil { - n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) - return err - } - - if err := n.state.DeletePeriodicLaunch(index, req.JobID); err != nil { - n.logger.Printf("[ERR] nomad.fsm: DeletePeriodicLaunch failed: %v", err) - return err - } + if err := n.periodicDispatcher.Remove(req.JobID); err != nil { + n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) + return err } + // We always delete from the periodic launch table because it is possible that + // the job was updated to be non-perioidic, thus checking if it is periodic + // doesn't ensure we clean it up properly. + n.state.DeletePeriodicLaunch(index, req.JobID) + return nil } diff --git a/nomad/periodic.go b/nomad/periodic.go index 816d74857..abb885580 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -197,11 +197,14 @@ func (p *PeriodicDispatch) removeLocked(jobID string) error { return nil } - if job, tracked := p.tracked[jobID]; tracked { - delete(p.tracked, jobID) - if err := p.heap.Remove(job); err != nil { - return fmt.Errorf("failed to remove tracked job %v: %v", jobID, err) - } + job, tracked := p.tracked[jobID] + if !tracked { + return nil + } + + delete(p.tracked, jobID) + if err := p.heap.Remove(job); err != nil { + return fmt.Errorf("failed to remove tracked job %v: %v", jobID, err) } // Signal an update. From 9acc05f732044cd191d915f0f1d5cbb376e8ee94 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 23 Dec 2015 17:50:50 -0800 Subject: [PATCH 40/45] Fix error message --- nomad/structs/structs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 0773e94fb..73fc82485 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -973,7 +973,7 @@ func (p *PeriodicConfig) Validate() error { case PeriodicSpecTest: // No-op default: - return fmt.Errorf("Unknown specification type %q", p.SpecType) + return fmt.Errorf("Unknown periodic specification type %q", p.SpecType) } return nil From bff276806014ddfc82978760bbdfac2bd51e8949 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 23 Dec 2015 18:22:16 -0800 Subject: [PATCH 41/45] Use desired launch time in periodic launch table --- nomad/fsm.go | 8 +++++++- nomad/leader_test.go | 8 ++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/nomad/fsm.go b/nomad/fsm.go index 54a69584c..1c96cfdab 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -244,7 +244,13 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { } if parent.IsPeriodic() { - launch := &structs.PeriodicLaunch{ID: parentID, Launch: time.Now()} + t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) + if err != nil { + n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) + return err + } + + launch := &structs.PeriodicLaunch{ID: parentID, Launch: t} if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) return err diff --git a/nomad/leader_test.go b/nomad/leader_test.go index 6dab28cae..f3029815b 100644 --- a/nomad/leader_test.go +++ b/nomad/leader_test.go @@ -388,13 +388,13 @@ func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { // Flush the periodic dispatcher, ensuring that no evals will be created. s1.periodicDispatcher.SetEnabled(false) - // Sleep till after the job should have been launched. - time.Sleep(3 * time.Second) - // Get the current time to ensure the launch time is after this once we // restore. now := time.Now() + // Sleep till after the job should have been launched. + time.Sleep(3 * time.Second) + // Restore the periodic dispatcher. s1.periodicDispatcher.SetEnabled(true) s1.periodicDispatcher.Start() @@ -412,7 +412,7 @@ func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { } if last.Launch.Before(now) { - t.Fatalf("restorePeriodicDispatcher did not force launch") + t.Fatalf("restorePeriodicDispatcher did not force launch: last %v; want after %v", last.Launch, now) } } From e87f3e6ca74f59625fc5bb14fbe1d459f0049079 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 23 Dec 2015 18:54:51 -0800 Subject: [PATCH 42/45] Simplify periodic nextLaunch, dispatch and run --- nomad/periodic.go | 115 ++++++++++++----------------------------- nomad/periodic_test.go | 57 +------------------- 2 files changed, 33 insertions(+), 139 deletions(-) diff --git a/nomad/periodic.go b/nomad/periodic.go index abb885580..2377da226 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -2,7 +2,6 @@ package nomad import ( "container/heap" - "errors" "fmt" "log" "strconv" @@ -248,110 +247,60 @@ func (p *PeriodicDispatch) shouldRun() bool { // then creates an evaluation to run the job. func (p *PeriodicDispatch) run() { defer close(p.waitCh) - var now time.Time + var launchCh <-chan time.Time for p.shouldRun() { - job, launch, err := p.nextLaunch() - if err != nil { - p.l.RLock() - defer p.l.RUnlock() - if !p.running { - p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) - } - return - } else if job == nil { - return + job, launch := p.nextLaunch() + if launch.IsZero() { + launchCh = nil + } else { + launchDur := launch.Sub(time.Now()) + launchCh = time.After(launchDur) + p.logger.Printf("[DEBUG] nomad.periodic: launching job %q in %s", job.ID, launchDur) } - now = time.Now() - p.logger.Printf("[DEBUG] nomad.periodic: launching job %q in %s", job.ID, launch.Sub(now)) - select { case <-p.stopCh: return case <-p.updateCh: continue - case <-time.After(launch.Sub(now)): - // Get the current time so that we don't miss any jobs will we're creating evals. - now = time.Now() - p.dispatch(launch, now) + case <-launchCh: + p.dispatch(job, launch) } } } -// dispatch scans the periodic jobs in order of launch time and creates -// evaluations for all jobs whose next launch time is equal to that of the -// passed launchTime. The now time is used to determine the next launch time for -// the dispatched jobs. -func (p *PeriodicDispatch) dispatch(launchTime time.Time, now time.Time) { +// dispatch creates an evaluation for the job and updates its next launchtime +// based on the passed launch time. +func (p *PeriodicDispatch) dispatch(job *structs.Job, launchTime time.Time) { p.l.Lock() defer p.l.Unlock() - // Create evals for all the jobs with the same launch time. - for { - if p.heap.Length() == 0 { - return - } - - j, err := p.heap.Peek() - if err != nil { - p.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic job: %v", err) - return - } - - if j.next != launchTime { - return - } - - if err := p.heap.Update(j.job, j.job.Periodic.Next(now)); err != nil { - p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %q: %v", j.job.ID, err) - } - - p.logger.Printf("[DEBUG] nomad.periodic: launching job %v at %v", j.job.ID, launchTime) - go p.createEval(j.job, launchTime) + nextLaunch := job.Periodic.Next(launchTime) + if err := p.heap.Update(job, nextLaunch); err != nil { + p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %q: %v", job.ID, err) } + + p.logger.Printf("[DEBUG] nomad.periodic: launching job %v at %v", job.ID, launchTime) + p.createEval(job, launchTime) } // nextLaunch returns the next job to launch and when it should be launched. If // the next job can't be determined, an error is returned. If the dispatcher is // stopped, a nil job will be returned. -func (p *PeriodicDispatch) nextLaunch() (*structs.Job, time.Time, error) { -PICK: +func (p *PeriodicDispatch) nextLaunch() (*structs.Job, time.Time) { // If there is nothing wait for an update. p.l.RLock() + defer p.l.RUnlock() if p.heap.Length() == 0 { - p.l.RUnlock() - - // Block until there is an update, or the dispatcher is stopped. - select { - case <-p.stopCh: - return nil, time.Time{}, nil - case <-p.updateCh: - } - p.l.RLock() + return nil, time.Time{} } - nextJob, err := p.heap.Peek() - p.l.RUnlock() - if err != nil { - select { - case <-p.stopCh: - return nil, time.Time{}, nil - default: - return nil, time.Time{}, err - } + nextJob := p.heap.Peek() + if nextJob == nil { + return nil, time.Time{} } - // If there are only invalid times, wait for an update. - if nextJob.next.IsZero() { - select { - case <-p.stopCh: - return nil, time.Time{}, nil - case <-p.updateCh: - goto PICK - } - } - - return nextJob.job, nextJob.next, nil + return nextJob.job, nextJob.next } // createEval instantiates a job based on the passed periodic job and submits an @@ -461,22 +410,22 @@ func (p *periodicHeap) Push(job *structs.Job, next time.Time) error { return nil } -func (p *periodicHeap) Pop() (*periodicJob, error) { +func (p *periodicHeap) Pop() *periodicJob { if len(p.heap) == 0 { - return nil, errors.New("heap is empty") + return nil } pJob := heap.Pop(&p.heap).(*periodicJob) delete(p.index, pJob.job.ID) - return pJob, nil + return pJob } -func (p *periodicHeap) Peek() (periodicJob, error) { +func (p *periodicHeap) Peek() *periodicJob { if len(p.heap) == 0 { - return periodicJob{}, errors.New("heap is empty") + return nil } - return *(p.heap[0]), nil + return p.heap[0] } func (p *periodicHeap) Contains(job *structs.Job) bool { diff --git a/nomad/periodic_test.go b/nomad/periodic_test.go index c2e395919..0e7002265 100644 --- a/nomad/periodic_test.go +++ b/nomad/periodic_test.go @@ -433,57 +433,6 @@ func TestPeriodicDispatch_Complex(t *testing.T) { } } -func TestPeriodicDispatch_NextLaunch(t *testing.T) { - t.Parallel() - p, _ := testPeriodicDispatcher() - - // Create two job that will be launched at the same time. - invalid := time.Unix(0, 0) - expected := time.Now().Round(1 * time.Second).Add(1 * time.Second) - job := testPeriodicJob(invalid) - job2 := testPeriodicJob(expected) - - // Make sure the periodic dispatcher isn't running. - close(p.stopCh) - p.stopCh = make(chan struct{}) - - // Run nextLaunch. - timeout := make(chan struct{}) - var j *structs.Job - var launch time.Time - var err error - go func() { - j, launch, err = p.nextLaunch() - close(timeout) - }() - - // Add them. - if err := p.Add(job); err != nil { - t.Fatalf("Add failed %v", err) - } - - // Delay adding a valid job. - time.Sleep(200 * time.Millisecond) - if err := p.Add(job2); err != nil { - t.Fatalf("Add failed %v", err) - } - - select { - case <-time.After(2 * time.Second): - t.Fatal("timeout") - case <-timeout: - if err != nil { - t.Fatalf("nextLaunch() failed: %v", err) - } - if j != job2 { - t.Fatalf("Incorrect job returned; got %v; want %v", j, job2) - } - if launch != expected { - t.Fatalf("Incorrect launch time; got %v; want %v", launch, expected) - } - } -} - func shuffle(jobs []*structs.Job) { rand.Seed(time.Now().Unix()) for i := range jobs { @@ -512,11 +461,7 @@ func TestPeriodicHeap_Order(t *testing.T) { exp := []string{"j2", "j3", "j1"} var act []string for i := 0; i < 3; i++ { - pJob, err := h.Pop() - if err != nil { - t.Fatal(err) - } - + pJob := h.Pop() act = append(act, lookup[pJob.job]) } From 9244037bba133419857a48afd3ab74f54395cdd6 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 23 Dec 2015 19:02:31 -0800 Subject: [PATCH 43/45] Only add periodic job insertion time once --- nomad/fsm.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/nomad/fsm.go b/nomad/fsm.go index 1c96cfdab..5fe4009a2 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -222,12 +222,20 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { // job was not launched. In this case, we use the insertion time to // determine if a launch was missed. if req.Job.IsPeriodic() { + prevLaunch, err := n.state.PeriodicLaunchByID(req.Job.ID) + if err != nil { + n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err) + return err + } + // Record the insertion time as a launch. We overload the launch table // such that the first entry is the insertion time. - launch := &structs.PeriodicLaunch{ID: req.Job.ID, Launch: time.Now()} - if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { - n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) - return err + if prevLaunch == nil { + launch := &structs.PeriodicLaunch{ID: req.Job.ID, Launch: time.Now()} + if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { + n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) + return err + } } } From 5ac413bffd0b6561611ff241ac12a20e05c5e55e Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 23 Dec 2015 19:11:24 -0800 Subject: [PATCH 44/45] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c62ba4fb9..7e9fbc7ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## 0.3.0 (UNRELEASED) IMPROVEMENTS: + * core: Periodic specification for jobs [GH-540] * core: Improved restart policy with more user configuration [GH-594] * core: Batch jobs are garbage collected from the Nomad Servers [GH-586] * driver/rkt: Add support for CPU/Memory isolation [GH-610] From 0b29c2046d31627320ba2cead9c719a2d0605b2a Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 23 Dec 2015 19:44:42 -0800 Subject: [PATCH 45/45] Test ebug log --- scheduler/generic_sched_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 33abef8a4..7af4c4fb2 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -405,6 +405,9 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) { // Ensure all allocations placed if len(out) != 10 { + for _, alloc := range out { + t.Logf("%#v", alloc) + } t.Fatalf("bad: %#v", out) } h.AssertEvalStatus(t, structs.EvalStatusComplete)