test: simplify overlap job placement logic (#14811)

* test: simplify overlap job placement logic

Trying to fix #14806

Both the previous approach as well as this one worked on e2e clusters I
spun up.

* simplify code flow
This commit is contained in:
Michael Schurter 2022-10-12 11:21:28 -07:00 committed by GitHub
parent bcd26f8815
commit bdb639b3e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 45 deletions

View File

@ -20,49 +20,17 @@ func TestOverlap(t *testing.T) {
nomadClient := e2eutil.NomadClient(t)
e2eutil.WaitForLeader(t, nomadClient)
// Wait for at least 1 feasible node to be ready and get its ID
var node *api.Node
testutil.Wait(t, func() (bool, error) {
nodesList, _, err := nomadClient.Nodes().List(nil)
if err != nil {
return false, fmt.Errorf("error listing nodes: %v", err)
}
for _, n := range nodesList {
if n.Status != "ready" {
continue
}
if n.SchedulingEligibility != "eligible" {
continue
}
node, _, err = nomadClient.Nodes().Info(n.ID, nil)
must.NoError(t, err)
if node.Attributes["kernel.name"] != "linux" {
continue
}
return true, nil
}
return false, fmt.Errorf("no nodes ready before timeout; need at least 1 ready")
})
// Force job to fill one exact node
getJob := func() (*api.Job, string) {
job, err := e2eutil.Parse2(t, "testdata/overlap.nomad")
must.NoError(t, err)
jobID := *job.ID + uuid.Short()
job.ID = &jobID
job.Datacenters = []string{node.Datacenter}
job.Constraints[1].RTarget = node.ID
availCPU := int(node.NodeResources.Cpu.CpuShares - int64(node.ReservedResources.Cpu.CpuShares))
job.TaskGroups[0].Tasks[0].Resources.CPU = &availCPU
return job, *job.ID
}
job1, jobID1 := getJob()
// Register initial job that should block subsequent job's placement until
// its shutdown_delay is up.
_, _, err := nomadClient.Jobs().Register(job1, nil)
must.NoError(t, err)
defer e2eutil.WaitForJobStopped(t, nomadClient, jobID1)
@ -81,6 +49,10 @@ func TestOverlap(t *testing.T) {
origAlloc.ID, jobID1, origAlloc.ClientStatus)
})
// Capture node so we can ensure 2nd job is blocked by first
node, _, err := nomadClient.Nodes().Info(origAlloc.NodeID, nil)
must.NoError(t, err)
// Stop job but don't wait for ClientStatus terminal
_, _, err = nomadClient.Jobs().Deregister(jobID1, false, nil)
must.NoError(t, err)
@ -94,9 +66,13 @@ func TestOverlap(t *testing.T) {
a.ID, ds, cs)
})
// Start replacement job and assert it is blocked
// Start replacement job on same node and assert it is blocked
job2, jobID2 := getJob()
job2.Constraints = append(job2.Constraints, api.NewConstraint("${node.unique.id}", "=", origAlloc.NodeID))
job2.TaskGroups[0].Tasks[0].ShutdownDelay = 0 // no need on the followup
availCPU := int(node.NodeResources.Cpu.CpuShares - int64(node.ReservedResources.Cpu.CpuShares))
job2.TaskGroups[0].Tasks[0].Resources.CPU = &availCPU // require job1 to free resources
resp, _, err := nomadClient.Jobs().Register(job2, nil)
must.NoError(t, err)
defer e2eutil.WaitForJobStopped(t, nomadClient, jobID2)
@ -113,8 +89,8 @@ func TestOverlap(t *testing.T) {
// Wait for job1's ShutdownDelay for origAlloc.ClientStatus to go terminal
sleepyTime := minStopTime.Sub(time.Now())
if sleepyTime > 0 {
t.Logf("Sleeping for the rest of the shutdown_delay (%.3s/%s)",
sleepyTime, job1.TaskGroups[0].Tasks[0].ShutdownDelay)
t.Logf("Followup job %s blocked. Sleeping for the rest of %s's shutdown_delay (%.3s/%s)",
*job2.ID, *job1.ID, sleepyTime, job1.TaskGroups[0].Tasks[0].ShutdownDelay)
time.Sleep(sleepyTime)
}

View File

@ -1,5 +1,5 @@
job "overlap" {
datacenters = ["dc1"]
datacenters = ["dc1", "dc2"]
type = "service"
constraint {
@ -7,11 +7,6 @@ job "overlap" {
value = "linux"
}
constraint {
attribute = "${node.unique.id}"
value = "<<Must be filled in by test>>"
}
group "overlap" {
count = 1
@ -27,8 +22,7 @@ job "overlap" {
}
resources {
# Must be filled in by test
cpu = "0"
cpu = "500"
memory = "50"
}
}