e2e: rescheduling tests

Ports the rescheduling tests (which aren't running in CI) into the current
test framework so that they're run on nightly, and exercises the new CLI
helpers.
This commit is contained in:
Tim Gross 2020-09-09 16:59:07 -04:00
parent 28e9bbbbf4
commit 294c7149a2
16 changed files with 523 additions and 343 deletions

View file

@ -22,6 +22,7 @@ import (
_ "github.com/hashicorp/nomad/e2e/nomad09upgrade"
_ "github.com/hashicorp/nomad/e2e/nomadexec"
_ "github.com/hashicorp/nomad/e2e/podman"
_ "github.com/hashicorp/nomad/e2e/rescheduling"
_ "github.com/hashicorp/nomad/e2e/spread"
_ "github.com/hashicorp/nomad/e2e/systemsched"
_ "github.com/hashicorp/nomad/e2e/taskevents"

134
e2e/rescheduling/helpers.go Normal file
View file

@ -0,0 +1,134 @@
package rescheduling
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os/exec"
"regexp"
"strings"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/testutil"
)
// allocStatuses returns a slice of client statuses
func allocStatuses(f *framework.F, jobID string) []string {
out, err := e2eutil.Command("nomad", "job", "status", "-verbose", "-all-allocs", jobID)
f.NoError(err, "nomad job status failed", err)
section, err := e2eutil.GetSection(out, "Allocations")
f.NoError(err, "could not find Allocations section", err)
allocs, err := e2eutil.ParseColumns(section)
f.NoError(err, "could not parse Allocations section", err)
statuses := []string{}
for _, alloc := range allocs {
statuses = append(statuses, alloc["Status"])
}
return statuses
}
// allocStatusesRescheduled is a helper function that pulls
// out client statuses only from rescheduled allocs.
func allocStatusesRescheduled(f *framework.F, jobID string) []string {
out, err := e2eutil.Command("nomad", "job", "status", "-verbose", jobID)
f.NoError(err, "nomad job status failed", err)
section, err := e2eutil.GetSection(out, "Allocations")
f.NoError(err, "could not find Allocations section", err)
allocs, err := e2eutil.ParseColumns(section)
f.NoError(err, "could not parse Allocations section", err)
statuses := []string{}
for _, alloc := range allocs {
allocID := alloc["ID"]
// reschedule tracker isn't exposed in the normal CLI output
out, err := e2eutil.Command("nomad", "alloc", "status", "-json", allocID)
f.NoError(err, "nomad alloc status failed", err)
dec := json.NewDecoder(strings.NewReader(out))
alloc := &api.Allocation{}
err = dec.Decode(alloc)
f.NoError(err, "could not decode alloc status JSON: %w", err)
if (alloc.RescheduleTracker != nil &&
len(alloc.RescheduleTracker.Events) > 0) || alloc.FollowupEvalID != "" {
statuses = append(statuses, alloc.ClientStatus)
}
}
return statuses
}
// register is a helper that registers a jobspec with a unique ID
// and records that ID in the testcase for later cleanup
func register(f *framework.F, jobFile, jobID string) {
cmd := exec.Command("nomad", "job", "run", "-")
stdin, err := cmd.StdinPipe()
f.NoError(err, fmt.Sprintf("could not open stdin?: %v", err))
content, err := ioutil.ReadFile(jobFile)
f.NoError(err, fmt.Sprintf("could not open job file: %v", err))
// hack off the first line to replace with our unique ID
var re = regexp.MustCompile(`^job "\w+" \{`)
jobspec := re.ReplaceAllString(string(content),
fmt.Sprintf("job \"%s\" {", jobID))
go func() {
defer stdin.Close()
io.WriteString(stdin, jobspec)
}()
out, err := cmd.CombinedOutput()
f.NoError(err, "could not register job: %v\n%v", err, string(out))
}
func waitForAllocStatusComparison(query func() ([]string, error), comparison func([]string) bool) error {
var got []string
var err error
testutil.WaitForResultRetries(30, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
got, err = query()
if err != nil {
return false, err
}
return comparison(got), nil
}, func(e error) {
err = fmt.Errorf("alloc status check failed: got %#v", got)
})
return err
}
func waitForLastDeploymentStatus(f *framework.F, jobID, status string) error {
var got string
var err error
testutil.WaitForResultRetries(30, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
out, err := e2eutil.Command("nomad", "job", "status", jobID)
f.NoError(err, "could not get job status: %v\n%v", err, out)
section, err := e2eutil.GetSection(out, "Latest Deployment")
f.NoError(err, "could not find Latest Deployment section", err)
fields, err := e2eutil.ParseFields(section)
f.NoError(err, "could not parse Latest Deployment section", err)
got = fields["Status"]
return got == status, nil
}, func(e error) {
err = fmt.Errorf("deployment status check failed: got %#v", got)
})
return err
}

View file

@ -0,0 +1,388 @@
package rescheduling
import (
"fmt"
"os"
"reflect"
"sort"
"time"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/jobspec"
)
type RescheduleE2ETest struct {
framework.TC
jobIds []string
}
func init() {
framework.AddSuites(&framework.TestSuite{
Component: "Rescheduling",
CanRunLocal: true,
Consul: true,
Cases: []framework.TestCase{
new(RescheduleE2ETest),
},
})
}
func (tc *RescheduleE2ETest) BeforeAll(f *framework.F) {
e2eutil.WaitForLeader(f.T(), tc.Nomad())
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
}
func (tc *RescheduleE2ETest) AfterEach(f *framework.F) {
if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
return
}
for _, id := range tc.jobIds {
_, err := e2eutil.Command("nomad", "job", "stop", "-purge", id)
f.NoError(err)
}
tc.jobIds = []string{}
_, err := e2eutil.Command("nomad", "system", "gc")
f.NoError(err)
}
// TestNoReschedule runs a job that should fail and never reschedule
func (tc *RescheduleE2ETest) TestNoReschedule(f *framework.F) {
jobID := "test-no-reschedule" + uuid.Generate()[0:8]
register(f, "rescheduling/input/norescheduling.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"failed", "failed", "failed"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
)
f.NoError(err, "should have exactly 3 failed allocs")
}
// TestNoRescheduleSystem runs a system job that should fail and never reschedule
func (tc *RescheduleE2ETest) TestNoRescheduleSystem(f *framework.F) {
jobID := "test-reschedule-system" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_system.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool {
for _, status := range got {
if status != "failed" {
return false
}
}
return true
},
)
f.NoError(err, "should have only failed allocs")
}
// TestDefaultReschedule runs a job that should reschedule after delay
func (tc *RescheduleE2ETest) TestDefaultReschedule(f *framework.F) {
jobID := "test-default-reschedule" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_default.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"failed", "failed", "failed"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
)
f.NoError(err, "should have exactly 3 failed allocs")
// TODO(tgross): return early if "slow" isn't set
// wait until first exponential delay kicks in and rescheduling is attempted
time.Sleep(time.Second * 35)
expected = []string{"failed", "failed", "failed", "failed", "failed", "failed"}
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
)
f.NoError(err, "should have exactly 6 failed allocs after 35s")
}
// TestRescheduleMaxAttempts runs a job with a maximum reschedule attempts
func (tc *RescheduleE2ETest) TestRescheduleMaxAttempts(f *framework.F) {
jobID := "test-reschedule-fail" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_fail.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"failed", "failed", "failed"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
)
f.NoError(err, "should have exactly 3 failed allocs")
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_fail.nomad")
f.NoError(err)
job.ID = &jobID
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "sleep 15000"}
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool {
for _, status := range got {
if status == "running" {
return true
}
}
return false
},
)
f.NoError(err, "should have at least 1 running alloc")
}
// TestRescheduleSuccess runs a job that should be running after rescheduling
func (tc *RescheduleE2ETest) TestRescheduleSuccess(f *framework.F) {
jobID := "test-reschedule-success" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_success.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool {
for _, status := range got {
if status == "running" {
return true
}
}
return false
},
)
f.NoError(err, "should have at least 1 running alloc")
}
// TestRescheduleWithUpdate updates a running job to fail, and verifies that
// it gets rescheduled
func (tc *RescheduleE2ETest) TestRescheduleWithUpdate(f *framework.F) {
jobID := "test-reschedule-update" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_update.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"running", "running", "running"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
)
f.NoError(err, "should have exactly 3 running allocs")
// reschedule to make fail
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_update.nomad")
f.NoError(err)
job.ID = &jobID
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
func(got []string) bool { return len(got) > 0 },
)
f.NoError(err, "should have rescheduled allocs until progress deadline")
}
// TestRescheduleWithCanary updates a running job to fail, and verify that the
// canary gets rescheduled
func (tc *RescheduleE2ETest) TestRescheduleWithCanary(f *framework.F) {
jobID := "test-reschedule-canary" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_canary.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"running", "running", "running"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
)
f.NoError(err, "should have exactly 3 running allocs")
err = waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
// reschedule to make fail
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_canary.nomad")
f.NoError(err)
job.ID = &jobID
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
func(got []string) bool { return len(got) > 0 },
)
f.NoError(err, "should have rescheduled allocs until progress deadline")
err = waitForLastDeploymentStatus(f, jobID, "running")
f.NoError(err, "deployment should be running")
}
// TestRescheduleWithCanary updates a running job to fail, and verifies that
// the job gets reverted
func (tc *RescheduleE2ETest) TestRescheduleWithCanaryAutoRevert(f *framework.F) {
jobID := "test-reschedule-canary-revert" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_canary_autorevert.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"running", "running", "running"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
)
f.NoError(err, "should have exactly 3 running allocs")
err = waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
// reschedule to make fail
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_canary_autorevert.nomad")
f.NoError(err)
job.ID = &jobID
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
func(got []string) bool { return len(got) == 0 },
)
f.NoError(err, "should have new allocs after update")
// then we'll fail and revert
expected = []string{"failed", "failed", "failed", "running", "running", "running"}
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
)
f.NoError(err, "should have exactly 3 running reverted allocs")
err = waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
}
// TestRescheduleMaxParallel updates a job with a max_parallel config
func (tc *RescheduleE2ETest) TestRescheduleMaxParallel(f *framework.F) {
jobID := "test-reschedule-maxp" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_maxp.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"running", "running", "running"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
)
f.NoError(err, "should have exactly 3 running allocs")
err = waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
// reschedule to make fail
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_maxp.nomad")
f.NoError(err)
job.ID = &jobID
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
expected = []string{"complete", "failed", "failed", "running", "running"}
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool {
sort.Strings(got)
return reflect.DeepEqual(got, expected)
},
)
f.NoError(err, "should have failed allocs including rescheduled failed allocs")
err = waitForLastDeploymentStatus(f, jobID, "running")
f.NoError(err, "deployment should be running")
}
// TestRescheduleMaxParallelAutoRevert updates a job with a max_parallel
// config that will autorevert on failure
func (tc *RescheduleE2ETest) TestRescheduleMaxParallelAutoRevert(f *framework.F) {
jobID := "test-reschedule-maxp-revert" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_maxp_autorevert.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
expected := []string{"running", "running", "running"}
err := waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool { return reflect.DeepEqual(got, expected) },
)
f.NoError(err, "should have exactly 3 running allocs")
err = waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
// reschedule to make fail
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_maxp_autorevert.nomad")
f.NoError(err)
job.ID = &jobID
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err = tc.Nomad().Jobs().Register(job, nil)
f.NoError(err, "could not register updated job")
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatusesRescheduled(f, jobID), nil },
func(got []string) bool { return len(got) == 0 },
)
f.NoError(err, "should have new allocs after update")
// wait for the revert
expected = []string{"complete", "failed", "running", "running", "running"}
err = waitForAllocStatusComparison(
func() ([]string, error) { return allocStatuses(f, jobID), nil },
func(got []string) bool {
sort.Strings(got)
return reflect.DeepEqual(got, expected)
},
)
f.NoError(err, "should have one successful, one failed, and 3 reverted allocs")
out, err := e2eutil.Command("nomad", "deployment", "status")
f.NoError(err, "could not get deployment status")
results, err := e2eutil.ParseColumns(out)
f.NoError(err, "could not parse deployment status")
statuses := []string{}
for _, row := range results {
if row["Job ID"] == jobID {
statuses = append(statuses, row["Status"])
}
}
f.True(reflect.DeepEqual([]string{"running", "failed", "successful"}, statuses),
fmt.Sprintf("deployment status was: %#v", statuses),
)
}
// TestRescheduleProgressDeadline verifies a deployment succeeds by the
// progress deadline
func (tc *RescheduleE2ETest) TestRescheduleProgressDeadline(f *framework.F) {
jobID := "test-reschedule-deadline" + uuid.Generate()[0:8]
register(f, "rescheduling/input/rescheduling_progressdeadline.nomad", jobID)
tc.jobIds = append(tc.jobIds, jobID)
// TODO(tgross): return early if "slow" isn't set
// wait until first exponential delay kicks in and rescheduling is attempted
time.Sleep(time.Second * 30)
err := waitForLastDeploymentStatus(f, jobID, "successful")
f.NoError(err, "deployment should be successful")
}

View file

@ -1,20 +0,0 @@
package rescheduling
import (
"flag"
"testing"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
var integration = flag.Bool("integration", false, "run integration tests")
var slow = flag.Bool("slow", false, "runs slower integration tests")
func TestServerSideRestarts(t *testing.T) {
if !*integration {
t.Skip("skipping test in non-integration mode.")
}
RegisterFailHandler(Fail)
RunSpecs(t, "Server Side Restart Tests")
}

View file

@ -1,323 +0,0 @@
package rescheduling
import (
"sort"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/jobspec"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/structs"
)
var _ = Describe("Server Side Restart Tests", func() {
var (
jobs *api.Jobs
system *api.System
job *api.Job
err error
specFile string
// allocStatuses is a helper function that pulls
// out client statuses from a slice of allocs
allocStatuses = func() []string {
allocs, _, err := jobs.Allocations(*job.ID, false, nil)
Expect(err).ShouldNot(HaveOccurred())
var ret []string
for _, a := range allocs {
ret = append(ret, a.ClientStatus)
}
sort.Strings(ret)
return ret
}
// allocStatusesRescheduled is a helper function that pulls
// out client statuses only from rescheduled allocs
allocStatusesRescheduled = func() []string {
allocs, _, err := jobs.Allocations(*job.ID, false, nil)
Expect(err).ShouldNot(HaveOccurred())
var ret []string
for _, a := range allocs {
if (a.RescheduleTracker != nil && len(a.RescheduleTracker.Events) > 0) || a.FollowupEvalID != "" {
ret = append(ret, a.ClientStatus)
}
}
return ret
}
// deploymentStatus is a helper function that returns deployment status of all deployments
// sorted by time
deploymentStatus = func() []string {
deploys, _, err := jobs.Deployments(*job.ID, false, nil)
Expect(err).ShouldNot(HaveOccurred())
var ret []string
sort.Slice(deploys, func(i, j int) bool {
return deploys[i].CreateIndex < deploys[j].CreateIndex
})
for _, d := range deploys {
ret = append(ret, d.Status)
}
return ret
}
)
BeforeSuite(func() {
conf := api.DefaultConfig()
// Create client
client, err := api.NewClient(conf)
Expect(err).ShouldNot(HaveOccurred())
jobs = client.Jobs()
system = client.System()
})
JustBeforeEach(func() {
job, err = jobspec.ParseFile(specFile)
Expect(err).ShouldNot(HaveOccurred())
job.ID = helper.StringToPtr(uuid.Generate())
resp, _, err := jobs.Register(job, nil)
Expect(err).ShouldNot(HaveOccurred())
Expect(resp.EvalID).ShouldNot(BeEmpty())
})
AfterEach(func() {
//Deregister job
jobs.Deregister(*job.ID, true, nil)
system.GarbageCollect()
})
Describe("Reschedule Stanza Tests", func() {
Context("No reschedule attempts", func() {
BeforeEach(func() {
specFile = "input/norescheduling.hcl"
})
It("Should have exactly three allocs and all failed", func() {
Eventually(allocStatuses, 5*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed"}))
})
})
Context("System jobs should never be rescheduled", func() {
BeforeEach(func() {
specFile = "input/rescheduling_system.hcl"
})
It("Should have exactly one failed alloc", func() {
Eventually(allocStatuses, 10*time.Second, time.Second).Should(ConsistOf([]string{"failed"}))
})
})
Context("Default Rescheduling", func() {
BeforeEach(func() {
specFile = "input/rescheduling_default.hcl"
})
It("Should have exactly three allocs and all failed after 5 secs", func() {
Eventually(allocStatuses, 5*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed"}))
})
// wait until first exponential delay kicks in and rescheduling is attempted
It("Should have exactly six allocs and all failed after 35 secs", func() {
if !*slow {
Skip("Skipping slow test")
}
Eventually(allocStatuses, 35*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed", "failed", "failed", "failed"}))
})
})
Context("Reschedule attempts maxed out", func() {
BeforeEach(func() {
specFile = "input/rescheduling_fail.hcl"
})
It("Should have all failed", func() {
Eventually(allocStatuses, 6*time.Second, time.Second).ShouldNot(
SatisfyAll(ContainElement("pending"),
ContainElement("running")))
})
Context("Updating job to change its version", func() {
It("Should have running allocs now", func() {
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "sleep 15000"}
_, _, err := jobs.Register(job, nil)
Expect(err).ShouldNot(HaveOccurred())
Eventually(allocStatuses, 5*time.Second, time.Second).Should(ContainElement("running"))
})
})
})
Context("Reschedule attempts succeeded", func() {
BeforeEach(func() {
specFile = "input/reschedule_success.hcl"
})
It("Should have some running allocs", func() {
Eventually(allocStatuses, 6*time.Second, time.Second).Should(
ContainElement("running"))
})
})
Context("Reschedule with update stanza", func() {
BeforeEach(func() {
specFile = "input/rescheduling_update.hcl"
})
It("Should have all running allocs", func() {
Eventually(allocStatuses, 3*time.Second, time.Second).Should(
ConsistOf([]string{"running", "running", "running"}))
})
Context("Updating job to make allocs fail", func() {
It("Should have rescheduled allocs until progress deadline", func() {
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err := jobs.Register(job, nil)
Expect(err).ShouldNot(HaveOccurred())
Eventually(allocStatusesRescheduled, 5*time.Second, time.Second).ShouldNot(BeEmpty())
})
})
})
Context("Reschedule with canary", func() {
BeforeEach(func() {
specFile = "input/rescheduling_canary.hcl"
})
It("Should have running allocs and successful deployment", func() {
Eventually(allocStatuses, 3*time.Second, time.Second).Should(
ConsistOf([]string{"running", "running", "running"}))
time.Sleep(2 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs
Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
ContainElement(structs.DeploymentStatusSuccessful))
})
Context("Updating job to make allocs fail", func() {
It("Should have rescheduled allocs until progress deadline", func() {
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err := jobs.Register(job, nil)
Expect(err).ShouldNot(HaveOccurred())
Eventually(allocStatusesRescheduled, 5*time.Second, time.Second).ShouldNot(BeEmpty())
// Verify new deployment and its status
// Deployment status should be running (because of progress deadline)
time.Sleep(3 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs
Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
ContainElement(structs.DeploymentStatusRunning))
})
})
})
Context("Reschedule with canary, auto revert with short progress deadline ", func() {
BeforeEach(func() {
specFile = "input/rescheduling_canary_autorevert.hcl"
})
It("Should have running allocs and successful deployment", func() {
Eventually(allocStatuses, 3*time.Second, time.Second).Should(
ConsistOf([]string{"running", "running", "running"}))
time.Sleep(2 * time.Second)
Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
ContainElement(structs.DeploymentStatusSuccessful))
// Make an update that causes the job to fail
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err := jobs.Register(job, nil)
Expect(err).ShouldNot(HaveOccurred())
Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty())
// Wait for the revert
Eventually(allocStatuses, 3*time.Second, time.Second).Should(
ConsistOf([]string{"failed", "failed", "failed", "running", "running", "running"}))
// Verify new deployment and its status
// There should be one successful, one failed, and one more successful (after revert)
time.Sleep(5 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs
Eventually(deploymentStatus(), 5*time.Second, time.Second).Should(
ConsistOf(structs.DeploymentStatusSuccessful, structs.DeploymentStatusFailed, structs.DeploymentStatusSuccessful))
})
})
Context("Reschedule with max parallel/auto_revert false", func() {
BeforeEach(func() {
specFile = "input/rescheduling_maxp.hcl"
})
It("Should have running allocs and successful deployment", func() {
Eventually(allocStatuses, 3*time.Second, time.Second).Should(
ConsistOf([]string{"running", "running", "running"}))
time.Sleep(2 * time.Second)
Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
ContainElement(structs.DeploymentStatusSuccessful))
})
Context("Updating job to make allocs fail", func() {
It("Should have rescheduled allocs till progress deadline", func() {
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err := jobs.Register(job, nil)
Expect(err).ShouldNot(HaveOccurred())
Eventually(allocStatusesRescheduled, 6*time.Second, time.Second).ShouldNot(BeEmpty())
// Should have failed allocs including rescheduled failed allocs
Eventually(allocStatuses, 6*time.Second, time.Second).Should(
ConsistOf([]string{"complete", "failed", "failed", "running", "running"}))
// Verify new deployment and its status
Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
ContainElement(structs.DeploymentStatusRunning))
})
})
})
Context("Reschedule with max parallel, auto revert true and short progress deadline", func() {
BeforeEach(func() {
specFile = "input/rescheduling_maxp_autorevert.hcl"
})
It("Should have running allocs and successful deployment", func() {
Eventually(allocStatuses, 3*time.Second, time.Second).Should(
ConsistOf([]string{"running", "running", "running"}))
time.Sleep(4 * time.Second)
Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
ContainElement(structs.DeploymentStatusSuccessful))
// Make an update that causes the job to fail
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
_, _, err := jobs.Register(job, nil)
Expect(err).ShouldNot(HaveOccurred())
Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty())
// Wait for the revert
Eventually(allocStatuses, 5*time.Second, time.Second).Should(
ConsistOf([]string{"complete", "failed", "running", "running", "running"}))
// Verify new deployment and its status
// There should be one successful, one failed, and one more successful (after revert)
time.Sleep(5 * time.Second)
Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
ConsistOf(structs.DeploymentStatusSuccessful, structs.DeploymentStatusFailed, structs.DeploymentStatusSuccessful))
})
})
Context("Reschedule with progress deadline", func() {
BeforeEach(func() {
specFile = "input/rescheduling_progressdeadline.hcl"
})
It("Should have running allocs and successful deployment", func() {
if !*slow {
Skip("Skipping slow test")
}
// Deployment should succeed eventually
time.Sleep(20 * time.Second)
Eventually(deploymentStatus(), 5*time.Second, time.Second).Should(
ContainElement(structs.DeploymentStatusSuccessful))
})
})
})
})