2022-09-22 20:06:17 +00:00
|
|
|
package overlap
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/hashicorp/nomad/api"
|
|
|
|
"github.com/hashicorp/nomad/e2e/e2eutil"
|
|
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
|
|
|
"github.com/hashicorp/nomad/testutil"
|
|
|
|
"github.com/shoenig/test/must"
|
|
|
|
)
|
|
|
|
|
|
|
|
// TestOverlap asserts that the resources used by an allocation are not
|
|
|
|
// considered free until their ClientStatus is terminal.
|
|
|
|
//
|
|
|
|
// See: https://github.com/hashicorp/nomad/issues/10440
|
|
|
|
func TestOverlap(t *testing.T) {
|
|
|
|
nomadClient := e2eutil.NomadClient(t)
|
|
|
|
e2eutil.WaitForLeader(t, nomadClient)
|
|
|
|
|
|
|
|
getJob := func() (*api.Job, string) {
|
|
|
|
job, err := e2eutil.Parse2(t, "testdata/overlap.nomad")
|
|
|
|
must.NoError(t, err)
|
|
|
|
jobID := *job.ID + uuid.Short()
|
|
|
|
job.ID = &jobID
|
|
|
|
return job, *job.ID
|
|
|
|
}
|
|
|
|
job1, jobID1 := getJob()
|
|
|
|
|
2022-10-12 18:21:28 +00:00
|
|
|
// Register initial job that should block subsequent job's placement until
|
|
|
|
// its shutdown_delay is up.
|
2022-09-22 20:06:17 +00:00
|
|
|
_, _, err := nomadClient.Jobs().Register(job1, nil)
|
|
|
|
must.NoError(t, err)
|
|
|
|
defer e2eutil.WaitForJobStopped(t, nomadClient, jobID1)
|
|
|
|
|
|
|
|
var origAlloc *api.AllocationListStub
|
|
|
|
testutil.Wait(t, func() (bool, error) {
|
2022-10-21 14:53:26 +00:00
|
|
|
time.Sleep(500 * time.Millisecond)
|
2022-10-14 21:15:07 +00:00
|
|
|
|
2022-09-22 20:06:17 +00:00
|
|
|
a, _, err := nomadClient.Jobs().Allocations(jobID1, false, nil)
|
|
|
|
must.NoError(t, err)
|
|
|
|
if n := len(a); n == 0 {
|
2022-10-14 21:15:07 +00:00
|
|
|
evalOut := e2eutil.DumpEvals(nomadClient, jobID1)
|
|
|
|
return false, fmt.Errorf("timed out before an allocation was found for %s. Evals:\n%s", jobID1, evalOut)
|
2022-09-22 20:06:17 +00:00
|
|
|
}
|
|
|
|
must.Len(t, 1, a)
|
|
|
|
|
|
|
|
origAlloc = a[0]
|
|
|
|
return origAlloc.ClientStatus == "running", fmt.Errorf("timed out before alloc %s for %s was running: %s",
|
|
|
|
origAlloc.ID, jobID1, origAlloc.ClientStatus)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Stop job but don't wait for ClientStatus terminal
|
|
|
|
_, _, err = nomadClient.Jobs().Deregister(jobID1, false, nil)
|
|
|
|
must.NoError(t, err)
|
|
|
|
minStopTime := time.Now().Add(job1.TaskGroups[0].Tasks[0].ShutdownDelay)
|
|
|
|
|
|
|
|
testutil.Wait(t, func() (bool, error) {
|
|
|
|
a, _, err := nomadClient.Allocations().Info(origAlloc.ID, nil)
|
|
|
|
must.NoError(t, err)
|
|
|
|
ds, cs := a.DesiredStatus, a.ClientStatus
|
|
|
|
return ds == "stop" && cs == "running", fmt.Errorf("expected alloc %s to be stop|running but found %s|%s",
|
|
|
|
a.ID, ds, cs)
|
|
|
|
})
|
|
|
|
|
2022-10-21 14:53:26 +00:00
|
|
|
// Start replacement job on same node and assert it is blocked because the
|
|
|
|
// static port is already in use.
|
2022-09-22 20:06:17 +00:00
|
|
|
job2, jobID2 := getJob()
|
2022-10-12 18:21:28 +00:00
|
|
|
job2.Constraints = append(job2.Constraints, api.NewConstraint("${node.unique.id}", "=", origAlloc.NodeID))
|
2022-09-22 20:06:17 +00:00
|
|
|
job2.TaskGroups[0].Tasks[0].ShutdownDelay = 0 // no need on the followup
|
2022-10-12 18:21:28 +00:00
|
|
|
|
2022-09-22 20:06:17 +00:00
|
|
|
resp, _, err := nomadClient.Jobs().Register(job2, nil)
|
|
|
|
must.NoError(t, err)
|
|
|
|
defer e2eutil.WaitForJobStopped(t, nomadClient, jobID2)
|
|
|
|
|
|
|
|
testutil.Wait(t, func() (bool, error) {
|
|
|
|
e, _, err := nomadClient.Evaluations().Info(resp.EvalID, nil)
|
|
|
|
must.NoError(t, err)
|
|
|
|
if e == nil {
|
|
|
|
return false, fmt.Errorf("eval %s does not exist yet", resp.EvalID)
|
|
|
|
}
|
|
|
|
return e.BlockedEval != "", fmt.Errorf("expected a blocked eval to be created but found: %#v", *e)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Wait for job1's ShutdownDelay for origAlloc.ClientStatus to go terminal
|
|
|
|
sleepyTime := minStopTime.Sub(time.Now())
|
|
|
|
if sleepyTime > 0 {
|
2022-10-12 18:21:28 +00:00
|
|
|
t.Logf("Followup job %s blocked. Sleeping for the rest of %s's shutdown_delay (%.3s/%s)",
|
|
|
|
*job2.ID, *job1.ID, sleepyTime, job1.TaskGroups[0].Tasks[0].ShutdownDelay)
|
2022-09-22 20:06:17 +00:00
|
|
|
time.Sleep(sleepyTime)
|
|
|
|
}
|
|
|
|
|
|
|
|
testutil.Wait(t, func() (bool, error) {
|
|
|
|
a, _, err := nomadClient.Allocations().Info(origAlloc.ID, nil)
|
|
|
|
must.NoError(t, err)
|
|
|
|
return a.ClientStatus == "complete", fmt.Errorf("expected original alloc %s to be complete but is %s",
|
|
|
|
a.ID, a.ClientStatus)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Assert replacement job unblocked and running
|
|
|
|
testutil.Wait(t, func() (bool, error) {
|
2022-10-21 14:53:26 +00:00
|
|
|
time.Sleep(500 * time.Millisecond)
|
2022-10-18 20:02:18 +00:00
|
|
|
|
2022-10-21 14:53:26 +00:00
|
|
|
a, _, err := nomadClient.Jobs().Allocations(jobID2, true, nil)
|
2022-09-22 20:06:17 +00:00
|
|
|
must.NoError(t, err)
|
|
|
|
if n := len(a); n == 0 {
|
2022-10-18 20:02:18 +00:00
|
|
|
evalOut := e2eutil.DumpEvals(nomadClient, jobID2)
|
|
|
|
return false, fmt.Errorf("timed out before an allocation was found for %s; Evals:\n%s", jobID2, evalOut)
|
2022-09-22 20:06:17 +00:00
|
|
|
}
|
|
|
|
must.Len(t, 1, a)
|
|
|
|
|
|
|
|
return a[0].ClientStatus == "running", fmt.Errorf("timed out before alloc %s for %s was running: %s",
|
|
|
|
a[0].ID, jobID2, a[0].ClientStatus)
|
|
|
|
})
|
|
|
|
}
|