From 33a5a72323a74efbc1bf246b48fd38a1a864aa3a Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Mon, 19 Mar 2018 10:40:36 -0500 Subject: [PATCH] Make suggested interval round to seconds, and more end to end test cases --- .../input/rescheduling_default.hcl | 21 +++++++++ .../input/rescheduling_system.hcl | 20 ++++++++ .../server_side_restarts_suite_test.go | 1 + e2e/rescheduling/server_side_restarts_test.go | 46 ++++++++++++++++--- nomad/structs/structs.go | 2 +- 5 files changed, 83 insertions(+), 7 deletions(-) create mode 100644 e2e/rescheduling/input/rescheduling_default.hcl create mode 100644 e2e/rescheduling/input/rescheduling_system.hcl diff --git a/e2e/rescheduling/input/rescheduling_default.hcl b/e2e/rescheduling/input/rescheduling_default.hcl new file mode 100644 index 000000000..56a829d7a --- /dev/null +++ b/e2e/rescheduling/input/rescheduling_default.hcl @@ -0,0 +1,21 @@ +job "test" { + datacenters = ["dc1"] + type = "service" + + group "t" { + count = 3 + task "t" { + driver = "raw_exec" + config { + command = "bash" + args = ["-c", "lol 5000"] + } + } + restart { + attempts = 0 + delay = "0s" + mode = "fail" + } + + } +} \ No newline at end of file diff --git a/e2e/rescheduling/input/rescheduling_system.hcl b/e2e/rescheduling/input/rescheduling_system.hcl new file mode 100644 index 000000000..91f95fbd5 --- /dev/null +++ b/e2e/rescheduling/input/rescheduling_system.hcl @@ -0,0 +1,20 @@ +job "test" { + datacenters = ["dc1"] + type = "system" + + group "t" { + count = 1 + task "t" { + driver = "raw_exec" + config { + command = "bash" + args = ["-c", "lol 5000"] + } + } + restart { + attempts = 0 + delay = "0s" + mode = "fail" + } + } +} \ No newline at end of file diff --git a/e2e/rescheduling/server_side_restarts_suite_test.go b/e2e/rescheduling/server_side_restarts_suite_test.go index 35eed99cc..0b4bfd2e4 100644 --- a/e2e/rescheduling/server_side_restarts_suite_test.go +++ b/e2e/rescheduling/server_side_restarts_suite_test.go @@ -9,6 +9,7 @@ import ( ) var integration = flag.Bool("integration", false, "run integration tests") +var slow = flag.Bool("slow", false, "runs slower integration tests") func TestServerSideRestarts(t *testing.T) { if !*integration { diff --git a/e2e/rescheduling/server_side_restarts_test.go b/e2e/rescheduling/server_side_restarts_test.go index 8efa0266e..cfbac498e 100644 --- a/e2e/rescheduling/server_side_restarts_test.go +++ b/e2e/rescheduling/server_side_restarts_test.go @@ -2,12 +2,16 @@ package rescheduling import ( "time" + "sort" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/jobspec" _ "github.com/hashicorp/nomad/jobspec" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/helper" ) var _ = Describe("Server Side Restart Tests", func() { @@ -28,6 +32,7 @@ var _ = Describe("Server Side Restart Tests", func() { for _, a := range allocs { ret = append(ret, a.ClientStatus) } + sort.Strings(ret) return ret } @@ -59,7 +64,7 @@ var _ = Describe("Server Side Restart Tests", func() { JustBeforeEach(func() { job, err = jobspec.ParseFile(specFile) Expect(err).ShouldNot(HaveOccurred()) - + job.ID = helper.StringToPtr(uuid.Generate()) resp, _, err := jobs.Register(job, nil) Expect(err).ShouldNot(HaveOccurred()) Expect(resp.EvalID).ShouldNot(BeEmpty()) @@ -84,20 +89,49 @@ var _ = Describe("Server Side Restart Tests", func() { }) }) + Context("System jobs should never be rescheduled", func() { + BeforeEach(func() { + specFile = "input/rescheduling_system.hcl" + }) + + It("Should have exactly one failed alloc", func() { + Eventually(allocStatuses, 10*time.Second, time.Second).Should(ConsistOf([]string{"failed"})) + }) + }) + + Context("Default Rescheduling", func() { + BeforeEach(func() { + specFile = "input/rescheduling_default.hcl" + }) + It("Should have exactly three allocs and all failed after 5 secs", func() { + Eventually(allocStatuses, 5*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed"})) + }) + // wait until first exponential delay kicks in and rescheduling is attempted + It("Should have exactly six allocs and all failed after 35 secs", func() { + if !*slow { + Skip("Skipping slow test") + } + Eventually(allocStatuses, 35*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed", "failed", "failed", "failed"})) + }) + }) + Context("Reschedule attempts maxed out", func() { BeforeEach(func() { specFile = "input/rescheduling_fail.hcl" }) - // Expect 3 original plus 6 rescheduled allocs from 2 attempts - var expected []string - for i := 0; i < 9; i++ { - expected = append(expected, "failed") - } It("Should have all failed", func() { Eventually(allocStatuses, 6*time.Second, time.Second).ShouldNot( SatisfyAll(ContainElement("pending"), ContainElement("running"))) }) + Context("Updating job to change its version", func() { + It("Should have running allocs now", func() { + job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "sleep 15000"} + _, _, err := jobs.Register(job, nil) + Expect(err).ShouldNot(HaveOccurred()) + Eventually(allocStatuses, 5*time.Second, time.Second).Should(ContainElement("running")) + }) + }) }) Context("Reschedule attempts succeeded", func() { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 0d6fd5c6f..6a4fbcf4b 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2892,7 +2892,7 @@ func (r *ReschedulePolicy) validateDelayParams() error { multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay)) } - multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Minute), r.Attempts)) + multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Second), r.Attempts)) return mErr.ErrorOrNil() }