The E2E test suite for rescheduling had a few bugs: * Using the command line to stop a job with a failing deployment returns a non-zero exit code, which would cause an otherwise passing test to fail. * Two of the input jobs were actually invalid but were only correctly detected as such because of #17342 This changeset also updates the whole test suite to move it off the v1 "framework". A few test assertions are also de-flaked. Fixes: #19076 Co-authored-by: Tim Gross <tgross@hashicorp.com>
This commit is contained in:
parent
5f5ed4161e
commit
7057c0c886
|
@ -31,7 +31,6 @@ import (
|
|||
_ "github.com/hashicorp/nomad/e2e/podman"
|
||||
_ "github.com/hashicorp/nomad/e2e/quotas"
|
||||
_ "github.com/hashicorp/nomad/e2e/remotetasks"
|
||||
_ "github.com/hashicorp/nomad/e2e/rescheduling"
|
||||
_ "github.com/hashicorp/nomad/e2e/scaling"
|
||||
_ "github.com/hashicorp/nomad/e2e/scalingpolicies"
|
||||
_ "github.com/hashicorp/nomad/e2e/scheduler_sysbatch"
|
||||
|
@ -45,6 +44,7 @@ import (
|
|||
_ "github.com/hashicorp/nomad/e2e/disconnectedclients"
|
||||
_ "github.com/hashicorp/nomad/e2e/namespaces"
|
||||
_ "github.com/hashicorp/nomad/e2e/nodedrain"
|
||||
_ "github.com/hashicorp/nomad/e2e/rescheduling"
|
||||
_ "github.com/hashicorp/nomad/e2e/volumes"
|
||||
)
|
||||
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
package rescheduling
|
||||
|
||||
// This package contains only tests, so this is a placeholder file to
|
||||
// make sure builds don't fail with "no non-test Go files in" errors
|
|
@ -30,7 +30,7 @@ job "test" {
|
|||
min_healthy_time = "1s"
|
||||
auto_revert = true
|
||||
healthy_deadline = "2s"
|
||||
progress_deadline = "3s"
|
||||
progress_deadline = "5s"
|
||||
}
|
||||
|
||||
restart {
|
||||
|
|
|
@ -29,7 +29,7 @@ job "demo3" {
|
|||
min_healthy_time = "1s"
|
||||
auto_revert = true
|
||||
healthy_deadline = "2s"
|
||||
progress_deadline = "3s"
|
||||
progress_deadline = "5s"
|
||||
}
|
||||
|
||||
restart {
|
||||
|
|
|
@ -27,7 +27,7 @@ job "demo2" {
|
|||
update {
|
||||
# we want the first allocation to take a while before we give up on it,
|
||||
# so that we can check the deployment's progress deadline before and
|
||||
# after it becomes healthy
|
||||
# after we determine it will never become healthy
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "15s"
|
||||
progress_deadline = "20s"
|
||||
|
@ -42,7 +42,7 @@ job "demo2" {
|
|||
}
|
||||
|
||||
reschedule {
|
||||
unlimited = "true"
|
||||
unlimited = true
|
||||
delay_function = "constant"
|
||||
delay = "5s"
|
||||
}
|
||||
|
|
|
@ -31,9 +31,10 @@ job "test3" {
|
|||
}
|
||||
|
||||
reschedule {
|
||||
attempts = 2
|
||||
interval = "5m"
|
||||
unlimited = false
|
||||
delay = "5s"
|
||||
delay_function = "constant"
|
||||
unlimited = true
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,500 +0,0 @@
|
|||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package rescheduling
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
e2e "github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/e2e/framework"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/jobspec"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
)
|
||||
|
||||
const ns = ""
|
||||
|
||||
type RescheduleE2ETest struct {
|
||||
framework.TC
|
||||
jobIds []string
|
||||
}
|
||||
|
||||
func init() {
|
||||
framework.AddSuites(&framework.TestSuite{
|
||||
Component: "Rescheduling",
|
||||
CanRunLocal: true,
|
||||
Consul: true,
|
||||
Cases: []framework.TestCase{
|
||||
new(RescheduleE2ETest),
|
||||
},
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func (tc *RescheduleE2ETest) BeforeAll(f *framework.F) {
|
||||
e2e.WaitForLeader(f.T(), tc.Nomad())
|
||||
e2e.WaitForNodesReady(f.T(), tc.Nomad(), 1)
|
||||
}
|
||||
|
||||
func (tc *RescheduleE2ETest) AfterEach(f *framework.F) {
|
||||
if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
|
||||
return
|
||||
}
|
||||
|
||||
for _, id := range tc.jobIds {
|
||||
err := e2e.StopJob(id, "-purge")
|
||||
f.Assert().NoError(err)
|
||||
}
|
||||
tc.jobIds = []string{}
|
||||
_, err := e2e.Command("nomad", "system", "gc")
|
||||
f.Assert().NoError(err)
|
||||
}
|
||||
|
||||
// TestNoReschedule runs a job that should fail and never reschedule
|
||||
func (tc *RescheduleE2ETest) TestNoReschedule(f *framework.F) {
|
||||
jobID := "test-no-reschedule-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/norescheduling.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"failed", "failed", "failed"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have exactly 3 failed allocs",
|
||||
)
|
||||
}
|
||||
|
||||
// TestNoRescheduleSystem runs a system job that should fail and never reschedule
|
||||
func (tc *RescheduleE2ETest) TestNoRescheduleSystem(f *framework.F) {
|
||||
jobID := "test-reschedule-system-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_system.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID, ns) },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status != "failed" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}, nil,
|
||||
),
|
||||
"should have only failed allocs",
|
||||
)
|
||||
}
|
||||
|
||||
// TestDefaultReschedule runs a job that should reschedule after delay
|
||||
func (tc *RescheduleE2ETest) TestDefaultReschedule(f *framework.F) {
|
||||
|
||||
jobID := "test-default-reschedule-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_default.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"failed", "failed", "failed"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have exactly 3 failed allocs",
|
||||
)
|
||||
|
||||
// TODO(tgross): return early if "slow" isn't set
|
||||
// wait until first exponential delay kicks in and rescheduling is attempted
|
||||
time.Sleep(time.Second * 35)
|
||||
expected = []string{"failed", "failed", "failed", "failed", "failed", "failed"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have exactly 6 failed allocs after 35s",
|
||||
)
|
||||
}
|
||||
|
||||
// TestRescheduleMaxAttempts runs a job with a maximum reschedule attempts
|
||||
func (tc *RescheduleE2ETest) TestRescheduleMaxAttempts(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-fail-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_fail.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"failed", "failed", "failed"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have exactly 3 failed allocs",
|
||||
)
|
||||
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_fail.nomad")
|
||||
f.NoError(err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "sleep 15000"}
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not register updated job")
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID, ns) },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status == "running" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}, nil,
|
||||
),
|
||||
"should have at least 1 running alloc",
|
||||
)
|
||||
}
|
||||
|
||||
// TestRescheduleSuccess runs a job that should be running after rescheduling
|
||||
func (tc *RescheduleE2ETest) TestRescheduleSuccess(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-success-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_success.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID, ns) },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status == "running" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}, nil,
|
||||
),
|
||||
"should have at least 1 running alloc",
|
||||
)
|
||||
}
|
||||
|
||||
// TestRescheduleWithUpdate updates a running job to fail, and verifies that
|
||||
// it gets rescheduled
|
||||
func (tc *RescheduleE2ETest) TestRescheduleWithUpdate(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-update-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_update.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have exactly 3 running allocs",
|
||||
)
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_update.nomad")
|
||||
f.NoError(err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not register updated job")
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID, ns) },
|
||||
func(got []string) bool { return len(got) > 0 }, nil,
|
||||
),
|
||||
"should have rescheduled allocs until progress deadline",
|
||||
)
|
||||
}
|
||||
|
||||
// TestRescheduleWithCanary updates a running job to fail, and verify that the
|
||||
// canary gets rescheduled
|
||||
func (tc *RescheduleE2ETest) TestRescheduleWithCanary(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-canary-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_canary.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have exactly 3 running allocs",
|
||||
)
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
"deployment should be successful")
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_canary.nomad")
|
||||
f.NoError(err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not register updated job")
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID, ns) },
|
||||
func(got []string) bool { return len(got) > 0 }, nil,
|
||||
),
|
||||
"should have rescheduled allocs until progress deadline",
|
||||
)
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, ns, "running", nil),
|
||||
"deployment should be running")
|
||||
}
|
||||
|
||||
// TestRescheduleWithCanaryAutoRevert updates a running job to fail, and
|
||||
// verifies that the job gets reverted.
|
||||
func (tc *RescheduleE2ETest) TestRescheduleWithCanaryAutoRevert(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-canary-revert-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_canary_autorevert.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have exactly 3 running allocs",
|
||||
)
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
"deployment should be successful")
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_canary_autorevert.nomad")
|
||||
f.NoError(err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not register updated job")
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID, ns) },
|
||||
func(got []string) bool { return len(got) > 0 }, nil,
|
||||
),
|
||||
"should have new allocs after update",
|
||||
)
|
||||
|
||||
// then we'll fail and revert
|
||||
expected = []string{"failed", "failed", "failed", "running", "running", "running"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have exactly 3 running reverted allocs",
|
||||
)
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
"deployment should be successful")
|
||||
}
|
||||
|
||||
// TestRescheduleMaxParallel updates a job with a max_parallel config
|
||||
func (tc *RescheduleE2ETest) TestRescheduleMaxParallel(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-maxp-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_maxp.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have exactly 3 running allocs",
|
||||
)
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
"deployment should be successful")
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_maxp.nomad")
|
||||
f.NoError(err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not register updated job")
|
||||
|
||||
expected = []string{"complete", "failed", "failed", "running", "running"}
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID, ns) },
|
||||
func(got []string) bool {
|
||||
sort.Strings(got)
|
||||
return reflect.DeepEqual(got, expected)
|
||||
}, nil,
|
||||
),
|
||||
"should have failed allocs including rescheduled failed allocs",
|
||||
)
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, ns, "running", nil),
|
||||
"deployment should be running")
|
||||
}
|
||||
|
||||
// TestRescheduleMaxParallelAutoRevert updates a job with a max_parallel
|
||||
// config that will autorevert on failure
|
||||
func (tc *RescheduleE2ETest) TestRescheduleMaxParallelAutoRevert(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-maxp-revert-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_maxp_autorevert.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have exactly 3 running allocs",
|
||||
)
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
"deployment should be successful")
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("rescheduling/input/rescheduling_maxp_autorevert.nomad")
|
||||
f.NoError(err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
_, _, err = tc.Nomad().Jobs().Register(job, nil)
|
||||
f.NoError(err, "could not e2e.Register updated job")
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatusesRescheduled(jobID, ns) },
|
||||
func(got []string) bool { return len(got) > 0 }, nil,
|
||||
),
|
||||
"should have new allocs after update",
|
||||
)
|
||||
|
||||
// wait for the revert
|
||||
expected = []string{"complete", "failed", "running", "running", "running"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID, ns) },
|
||||
func(got []string) bool {
|
||||
sort.Strings(got)
|
||||
return reflect.DeepEqual(got, expected)
|
||||
}, nil,
|
||||
),
|
||||
"should have one successful, one failed, and 3 reverted allocs",
|
||||
)
|
||||
|
||||
// at this point the allocs have been checked but we need to wait for the
|
||||
// deployment to be marked complete before we can assert that it's successful
|
||||
// and verify the count of deployments
|
||||
f.NoError(
|
||||
e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
"most recent deployment should be successful")
|
||||
|
||||
out, err := e2e.Command("nomad", "deployment", "status")
|
||||
f.NoError(err, "could not get deployment status")
|
||||
|
||||
results, err := e2e.ParseColumns(out)
|
||||
f.NoError(err, "could not parse deployment status")
|
||||
statuses := map[string]int{}
|
||||
for _, row := range results {
|
||||
if row["Job ID"] == jobID {
|
||||
statuses[row["Status"]]++
|
||||
}
|
||||
}
|
||||
|
||||
f.Equal(1, statuses["failed"],
|
||||
fmt.Sprintf("expected only 1 failed deployment, got:\n%s", out))
|
||||
f.Equal(2, statuses["successful"],
|
||||
fmt.Sprintf("expected 2 successful deployments, got:\n%s", out))
|
||||
}
|
||||
|
||||
// TestRescheduleProgressDeadline verifies the progress deadline is reset with
|
||||
// each healthy allocation, and that a rescheduled allocation does not.
|
||||
func (tc *RescheduleE2ETest) TestRescheduleProgressDeadline(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-deadline-" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_progressdeadline.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
expected := []string{"running"}
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
"should have a running allocation",
|
||||
)
|
||||
|
||||
deploymentID, err := e2e.LastDeploymentID(jobID, ns)
|
||||
f.NoError(err, "couldn't look up deployment")
|
||||
|
||||
oldDeadline, err := getProgressDeadline(deploymentID)
|
||||
f.NoError(err, "could not get progress deadline")
|
||||
time.Sleep(time.Second * 20)
|
||||
|
||||
newDeadline, err := getProgressDeadline(deploymentID)
|
||||
f.NoError(err, "could not get new progress deadline")
|
||||
f.NotEqual(oldDeadline, newDeadline, "progress deadline should have been updated")
|
||||
|
||||
f.NoError(e2e.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
"deployment should be successful")
|
||||
}
|
||||
|
||||
// TestRescheduleProgressDeadlineFail verifies the progress deadline is reset with
|
||||
// each healthy allocation, and that a rescheduled allocation does not.
|
||||
func (tc *RescheduleE2ETest) TestRescheduleProgressDeadlineFail(f *framework.F) {
|
||||
|
||||
jobID := "test-reschedule-deadline-fail" + uuid.Generate()[0:8]
|
||||
f.NoError(e2e.Register(jobID, "rescheduling/input/rescheduling_progressdeadline_fail.nomad"))
|
||||
tc.jobIds = append(tc.jobIds, jobID)
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
_, err := e2e.LastDeploymentID(jobID, ns)
|
||||
return err == nil, err
|
||||
}, func(err error) {
|
||||
f.NoError(err, "deployment wasn't created yet")
|
||||
})
|
||||
|
||||
deploymentID, err := e2e.LastDeploymentID(jobID, ns)
|
||||
f.NoError(err, "couldn't look up deployment")
|
||||
|
||||
oldDeadline, err := getProgressDeadline(deploymentID)
|
||||
f.NoError(err, "could not get progress deadline")
|
||||
time.Sleep(time.Second * 20)
|
||||
|
||||
f.NoError(e2e.WaitForLastDeploymentStatus(jobID, ns, "failed", nil),
|
||||
"deployment should be failed")
|
||||
|
||||
f.NoError(
|
||||
e2e.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2e.AllocStatuses(jobID, ns) },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status != "failed" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}, nil,
|
||||
),
|
||||
"should have only failed allocs",
|
||||
)
|
||||
|
||||
newDeadline, err := getProgressDeadline(deploymentID)
|
||||
f.NoError(err, "could not get new progress deadline")
|
||||
f.Equal(oldDeadline, newDeadline, "progress deadline should not have been updated")
|
||||
}
|
||||
|
||||
func getProgressDeadline(deploymentID string) (time.Time, error) {
|
||||
|
||||
out, err := e2e.Command("nomad", "deployment", "status", deploymentID)
|
||||
if err != nil {
|
||||
return time.Time{}, fmt.Errorf("could not get deployment status: %v\n%v", err, out)
|
||||
}
|
||||
|
||||
section, err := e2e.GetSection(out, "Deployed")
|
||||
if err != nil {
|
||||
return time.Time{}, fmt.Errorf("could not find Deployed section: %w", err)
|
||||
}
|
||||
|
||||
rows, err := e2e.ParseColumns(section)
|
||||
if err != nil {
|
||||
return time.Time{}, fmt.Errorf("could not parse Deployed section: %w", err)
|
||||
}
|
||||
|
||||
layout := "2006-01-02T15:04:05Z07:00" // taken from command/helpers.go
|
||||
raw := rows[0]["Progress Deadline"]
|
||||
return time.Parse(layout, raw)
|
||||
}
|
|
@ -0,0 +1,509 @@
|
|||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
package rescheduling
|
||||
|
||||
import (
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/jobspec"
|
||||
"github.com/shoenig/test"
|
||||
"github.com/shoenig/test/must"
|
||||
"github.com/shoenig/test/wait"
|
||||
)
|
||||
|
||||
const ns = "default"
|
||||
|
||||
func cleanupJob(t *testing.T, jobID string) {
|
||||
if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
|
||||
return
|
||||
}
|
||||
|
||||
t.Helper()
|
||||
t.Cleanup(func() {
|
||||
e2eutil.StopJob(jobID, "-purge", "-detach")
|
||||
_, err := e2eutil.Command("nomad", "system", "gc")
|
||||
test.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
// Note: most of the StopJob calls in this test suite will return an
|
||||
// error because the job has previously failed and we're not waiting for
|
||||
// the deployment to end
|
||||
|
||||
// TestRescheduling_Service_NoReschedule runs a service job that should fail and never
|
||||
// reschedule
|
||||
func TestRescheduling_Service_NoReschedule(t *testing.T) {
|
||||
jobID := "test-no-reschedule-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/norescheduling_service.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
expected := []string{"failed", "failed", "failed"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have exactly 3 failed allocs"),
|
||||
)
|
||||
}
|
||||
|
||||
// TestRescheduling_System_NoReschedule runs a system job that should fail and never
|
||||
// reschedule
|
||||
func TestRescheduling_System_NoReschedule(t *testing.T) {
|
||||
jobID := "test-no-reschedule-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/norescheduling_system.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2eutil.AllocStatuses(jobID, ns) },
|
||||
func(got []string) bool {
|
||||
for _, status := range got {
|
||||
if status != "failed" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}, nil,
|
||||
),
|
||||
must.Sprint("should have only failed allocs"),
|
||||
)
|
||||
}
|
||||
|
||||
// TestRescheduling_Default runs a job that should reschedule after delay
|
||||
func TestRescheduling_Default(t *testing.T) {
|
||||
jobID := "test-default-reschedule-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/rescheduling_default.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
expected := []string{"failed", "failed", "failed"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have exactly 3 failed allocs"),
|
||||
)
|
||||
|
||||
// wait until first exponential delay kicks in and rescheduling is attempted
|
||||
time.Sleep(time.Second * 35)
|
||||
expected = []string{"failed", "failed", "failed", "failed", "failed", "failed"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have exactly 6 failed allocs after 35s"),
|
||||
)
|
||||
}
|
||||
|
||||
// TestRescheduling_MaxAttempts runs a job with a maximum reschedule attempts
|
||||
func TestRescheduling_MaxAttempts(t *testing.T) {
|
||||
|
||||
jobID := "test-reschedule-fail-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/rescheduling_fail.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
expected := []string{"failed", "failed", "failed"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have exactly 3 failed allocs"),
|
||||
)
|
||||
|
||||
job, err := jobspec.ParseFile("./input/rescheduling_fail.nomad")
|
||||
must.NoError(t, err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "sleep 15000"}
|
||||
|
||||
nc := e2eutil.NomadClient(t)
|
||||
_, _, err = nc.Jobs().Register(job, nil)
|
||||
must.NoError(t, err, must.Sprint("could not register updated job"))
|
||||
|
||||
must.Wait(t, wait.InitialSuccess(
|
||||
wait.BoolFunc(func() bool {
|
||||
got, err := e2eutil.AllocStatuses(jobID, ns)
|
||||
must.NoError(t, err)
|
||||
for _, status := range got {
|
||||
if status == "running" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}),
|
||||
wait.Timeout(10*time.Second),
|
||||
wait.Gap(500*time.Millisecond),
|
||||
), must.Sprint("should have at least 1 running alloc"))
|
||||
}
|
||||
|
||||
// TestRescheduling_Success runs a job that should be running after rescheduling
|
||||
func TestRescheduling_Success(t *testing.T) {
|
||||
|
||||
jobID := "test-reschedule-success-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/rescheduling_success.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
must.Wait(t, wait.InitialSuccess(
|
||||
wait.BoolFunc(func() bool {
|
||||
got, err := e2eutil.AllocStatuses(jobID, ns)
|
||||
must.NoError(t, err)
|
||||
running := 0
|
||||
for _, status := range got {
|
||||
if status == "running" {
|
||||
running++
|
||||
}
|
||||
}
|
||||
return running == 3
|
||||
}),
|
||||
wait.Timeout(60*time.Second), // this can take a while!
|
||||
wait.Gap(500*time.Millisecond),
|
||||
), must.Sprint("all 3 allocs should eventually be running"))
|
||||
}
|
||||
|
||||
// TestRescheduling_WithUpdate updates a running job to fail, and verifies that
|
||||
// it gets rescheduled
|
||||
func TestRescheduling_WithUpdate(t *testing.T) {
|
||||
|
||||
jobID := "test-reschedule-update-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/rescheduling_update.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have exactly 3 running allocs"),
|
||||
)
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("./input/rescheduling_update.nomad")
|
||||
must.NoError(t, err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
|
||||
nc := e2eutil.NomadClient(t)
|
||||
_, _, err = nc.Jobs().Register(job, nil)
|
||||
must.NoError(t, err, must.Sprint("could not register updated job"))
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2eutil.AllocStatusesRescheduled(jobID, ns) },
|
||||
func(got []string) bool { return len(got) > 0 }, nil,
|
||||
),
|
||||
must.Sprint("should have rescheduled allocs until progress deadline"),
|
||||
)
|
||||
}
|
||||
|
||||
// TestRescheduling_WithCanary updates a running job to fail, and verify that the
|
||||
// canary gets rescheduled
|
||||
func TestRescheduling_WithCanary(t *testing.T) {
|
||||
|
||||
jobID := "test-reschedule-canary-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/rescheduling_canary.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have exactly 3 running allocs"),
|
||||
)
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
must.Sprint("deployment should be successful"))
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("./input/rescheduling_canary.nomad")
|
||||
must.NoError(t, err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
|
||||
nc := e2eutil.NomadClient(t)
|
||||
_, _, err = nc.Jobs().Register(job, nil)
|
||||
must.NoError(t, err, must.Sprint("could not register updated job"))
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2eutil.AllocStatusesRescheduled(jobID, ns) },
|
||||
func(got []string) bool { return len(got) > 0 }, nil,
|
||||
),
|
||||
must.Sprint("should have rescheduled allocs until progress deadline"),
|
||||
)
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForLastDeploymentStatus(jobID, ns, "running", nil),
|
||||
must.Sprint("deployment should be running"))
|
||||
}
|
||||
|
||||
// TestRescheduling_WithCanaryAutoRevert updates a running job to fail, and
|
||||
// verifies that the job gets reverted.
|
||||
func TestRescheduling_WithCanaryAutoRevert(t *testing.T) {
|
||||
|
||||
jobID := "test-reschedule-canary-revert-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/rescheduling_canary_autorevert.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have exactly 3 running allocs"),
|
||||
)
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
must.Sprint("deployment should be successful"))
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("./input/rescheduling_canary_autorevert.nomad")
|
||||
must.NoError(t, err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
|
||||
nc := e2eutil.NomadClient(t)
|
||||
_, _, err = nc.Jobs().Register(job, nil)
|
||||
must.NoError(t, err, must.Sprint("could not register updated job"))
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2eutil.AllocStatusesRescheduled(jobID, ns) },
|
||||
func(got []string) bool { return len(got) > 0 }, nil,
|
||||
),
|
||||
must.Sprint("should have new allocs after update"),
|
||||
)
|
||||
|
||||
// then we'll fail and revert
|
||||
expected = []string{"failed", "failed", "failed", "running", "running", "running"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have exactly 3 running reverted allocs"),
|
||||
)
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
must.Sprint("deployment should be successful"))
|
||||
}
|
||||
|
||||
// TestRescheduling_MaxParallel updates a job with a max_parallel config
|
||||
func TestRescheduling_MaxParallel(t *testing.T) {
|
||||
|
||||
jobID := "test-reschedule-maxp-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/rescheduling_maxp.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have exactly 3 running allocs"),
|
||||
)
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
must.Sprint("deployment should be successful"))
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("./input/rescheduling_maxp.nomad")
|
||||
must.NoError(t, err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
|
||||
nc := e2eutil.NomadClient(t)
|
||||
_, _, err = nc.Jobs().Register(job, nil)
|
||||
must.NoError(t, err, must.Sprint("could not register updated job"))
|
||||
|
||||
expected = []string{"complete", "failed", "failed", "running", "running"}
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2eutil.AllocStatuses(jobID, ns) },
|
||||
func(got []string) bool {
|
||||
sort.Strings(got)
|
||||
return reflect.DeepEqual(got, expected)
|
||||
}, nil,
|
||||
),
|
||||
must.Sprint("should have failed allocs including rescheduled failed allocs"),
|
||||
)
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForLastDeploymentStatus(jobID, ns, "running", nil),
|
||||
must.Sprint("deployment should be running"))
|
||||
}
|
||||
|
||||
// TestRescheduling_MaxParallelAutoRevert updates a job with a max_parallel
|
||||
// config that will autorevert on failure
|
||||
func TestRescheduling_MaxParallelAutoRevert(t *testing.T) {
|
||||
|
||||
jobID := "test-reschedule-maxp-revert-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/rescheduling_maxp_autorevert.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
expected := []string{"running", "running", "running"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have exactly 3 running allocs"),
|
||||
)
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
must.Sprint("deployment should be successful"))
|
||||
|
||||
// reschedule to make fail
|
||||
job, err := jobspec.ParseFile("./input/rescheduling_maxp_autorevert.nomad")
|
||||
must.NoError(t, err)
|
||||
job.ID = &jobID
|
||||
job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
|
||||
|
||||
nc := e2eutil.NomadClient(t)
|
||||
_, _, err = nc.Jobs().Register(job, nil)
|
||||
must.NoError(t, err, must.Sprint("could not e2eutil.Register updated job"))
|
||||
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2eutil.AllocStatusesRescheduled(jobID, ns) },
|
||||
func(got []string) bool { return len(got) > 0 }, nil,
|
||||
),
|
||||
must.Sprint("should have new allocs after update"),
|
||||
)
|
||||
|
||||
// wait for the revert
|
||||
expected = []string{"complete", "failed", "running", "running", "running"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusComparison(
|
||||
func() ([]string, error) { return e2eutil.AllocStatuses(jobID, ns) },
|
||||
func(got []string) bool {
|
||||
sort.Strings(got)
|
||||
return reflect.DeepEqual(got, expected)
|
||||
}, nil,
|
||||
),
|
||||
must.Sprint("should have one successful, one failed, and 3 reverted allocs"),
|
||||
)
|
||||
|
||||
// at this point the allocs have been checked but we need to wait for the
|
||||
// deployment to be marked complete before we can assert that it's successful
|
||||
// and verify the count of deployments
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForLastDeploymentStatus(jobID, ns, "successful", nil),
|
||||
must.Sprint("most recent deployment should be successful"))
|
||||
|
||||
out, err := e2eutil.Command("nomad", "deployment", "status")
|
||||
must.NoError(t, err, must.Sprint("could not get deployment status"))
|
||||
|
||||
results, err := e2eutil.ParseColumns(out)
|
||||
must.NoError(t, err, must.Sprint("could not parse deployment status"))
|
||||
statuses := map[string]int{}
|
||||
for _, row := range results {
|
||||
if row["Job ID"] == jobID {
|
||||
statuses[row["Status"]]++
|
||||
}
|
||||
}
|
||||
|
||||
must.Eq(t, 1, statuses["failed"],
|
||||
must.Sprintf("expected only 1 failed deployment, got:\n%s", out))
|
||||
must.Eq(t, 2, statuses["successful"],
|
||||
must.Sprintf("expected 2 successful deployments, got:\n%s", out))
|
||||
}
|
||||
|
||||
// TestRescheduling_ProgressDeadline verifies the progress deadline is only
|
||||
// reset with each healthy allocation, not failed one (which we'll then
|
||||
// reschedule)
|
||||
func TestRescheduling_ProgressDeadline(t *testing.T) {
|
||||
|
||||
jobID := "test-reschedule-deadline-" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/rescheduling_progressdeadline.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
expected := []string{"running"}
|
||||
must.NoError(t,
|
||||
e2eutil.WaitForAllocStatusExpected(jobID, ns, expected),
|
||||
must.Sprint("should have a running allocation"),
|
||||
)
|
||||
|
||||
var deploymentID string
|
||||
|
||||
deploymentID, err := e2eutil.LastDeploymentID(jobID, ns)
|
||||
must.NoError(t, err, must.Sprint("couldn't look up deployment"))
|
||||
|
||||
_, oldDeadline := getDeploymentState(t, deploymentID)
|
||||
|
||||
var newStatus string
|
||||
var newDeadline time.Time
|
||||
|
||||
must.Wait(t, wait.InitialSuccess(
|
||||
wait.BoolFunc(func() bool {
|
||||
newStatus, newDeadline = getDeploymentState(t, deploymentID)
|
||||
return newStatus == "successful"
|
||||
}),
|
||||
wait.Timeout(30*time.Second),
|
||||
wait.Gap(500*time.Millisecond),
|
||||
), must.Sprint("deployment should be successful"))
|
||||
|
||||
must.NotEq(t, oldDeadline, newDeadline,
|
||||
must.Sprint("progress deadline should have been updated"))
|
||||
}
|
||||
|
||||
// TestRescheduling_ProgressDeadlineFail verifies the progress deadline is only
|
||||
// reset with each healthy allocation, and this fails the deployment if not
|
||||
func TestRescheduling_ProgressDeadlineFail(t *testing.T) {
|
||||
|
||||
jobID := "test-reschedule-deadline-fail" + uuid.Generate()[0:8]
|
||||
must.NoError(t, e2eutil.Register(jobID, "./input/rescheduling_progressdeadline_fail.nomad"))
|
||||
|
||||
cleanupJob(t, jobID)
|
||||
|
||||
var deploymentID string
|
||||
|
||||
must.Wait(t, wait.InitialSuccess(
|
||||
wait.BoolFunc(func() bool {
|
||||
deploymentID, _ = e2eutil.LastDeploymentID(jobID, ns)
|
||||
return deploymentID != ""
|
||||
}),
|
||||
wait.Timeout(5*time.Second),
|
||||
wait.Gap(500*time.Millisecond),
|
||||
), must.Sprint("deployment not created"))
|
||||
|
||||
_, oldDeadline := getDeploymentState(t, deploymentID)
|
||||
|
||||
var newStatus string
|
||||
var newDeadline time.Time
|
||||
|
||||
must.Wait(t, wait.InitialSuccess(
|
||||
wait.BoolFunc(func() bool {
|
||||
newStatus, newDeadline = getDeploymentState(t, deploymentID)
|
||||
return newStatus == "failed"
|
||||
}),
|
||||
wait.Timeout(30*time.Second),
|
||||
wait.Gap(500*time.Millisecond),
|
||||
), must.Sprint("deployment should be failed"))
|
||||
|
||||
must.Eq(t, oldDeadline, newDeadline,
|
||||
must.Sprint("progress deadline should not have been updated"))
|
||||
}
|
||||
|
||||
// getDeploymentState returns the status and progress deadline for the given
|
||||
// deployment
|
||||
func getDeploymentState(t *testing.T, deploymentID string) (string, time.Time) {
|
||||
|
||||
out, err := e2eutil.Command("nomad", "deployment", "status", deploymentID)
|
||||
must.NoError(t, err, must.Sprintf("could not get deployment status from output: %v", out))
|
||||
|
||||
status, err := e2eutil.GetField(out, "Status")
|
||||
must.NoError(t, err, must.Sprintf("could not find Status field in output: %v", out))
|
||||
|
||||
section, err := e2eutil.GetSection(out, "Deployed")
|
||||
must.NoError(t, err, must.Sprintf("could not find Deployed section in output: %v", out))
|
||||
|
||||
rows, err := e2eutil.ParseColumns(section)
|
||||
must.NoError(t, err, must.Sprintf("could not parse Deployed section from output: %v", out))
|
||||
|
||||
layout := "2006-01-02T15:04:05Z07:00" // taken from command/helpers.go
|
||||
raw := rows[0]["Progress Deadline"]
|
||||
deadline, err := time.Parse(layout, raw)
|
||||
must.NoError(t, err, must.Sprint("could not parse Progress Deadline timestamp"))
|
||||
return status, deadline
|
||||
}
|
Loading…
Reference in New Issue