test: simplify overlap job placement logic (#14811)
* test: simplify overlap job placement logic Trying to fix #14806 Both the previous approach as well as this one worked on e2e clusters I spun up. * simplify code flow
This commit is contained in:
parent
bcd26f8815
commit
bdb639b3e2
|
@ -20,49 +20,17 @@ func TestOverlap(t *testing.T) {
|
||||||
nomadClient := e2eutil.NomadClient(t)
|
nomadClient := e2eutil.NomadClient(t)
|
||||||
e2eutil.WaitForLeader(t, nomadClient)
|
e2eutil.WaitForLeader(t, nomadClient)
|
||||||
|
|
||||||
// Wait for at least 1 feasible node to be ready and get its ID
|
|
||||||
var node *api.Node
|
|
||||||
testutil.Wait(t, func() (bool, error) {
|
|
||||||
nodesList, _, err := nomadClient.Nodes().List(nil)
|
|
||||||
if err != nil {
|
|
||||||
return false, fmt.Errorf("error listing nodes: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, n := range nodesList {
|
|
||||||
if n.Status != "ready" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if n.SchedulingEligibility != "eligible" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
node, _, err = nomadClient.Nodes().Info(n.ID, nil)
|
|
||||||
must.NoError(t, err)
|
|
||||||
|
|
||||||
if node.Attributes["kernel.name"] != "linux" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return false, fmt.Errorf("no nodes ready before timeout; need at least 1 ready")
|
|
||||||
})
|
|
||||||
|
|
||||||
// Force job to fill one exact node
|
|
||||||
getJob := func() (*api.Job, string) {
|
getJob := func() (*api.Job, string) {
|
||||||
job, err := e2eutil.Parse2(t, "testdata/overlap.nomad")
|
job, err := e2eutil.Parse2(t, "testdata/overlap.nomad")
|
||||||
must.NoError(t, err)
|
must.NoError(t, err)
|
||||||
jobID := *job.ID + uuid.Short()
|
jobID := *job.ID + uuid.Short()
|
||||||
job.ID = &jobID
|
job.ID = &jobID
|
||||||
job.Datacenters = []string{node.Datacenter}
|
|
||||||
job.Constraints[1].RTarget = node.ID
|
|
||||||
availCPU := int(node.NodeResources.Cpu.CpuShares - int64(node.ReservedResources.Cpu.CpuShares))
|
|
||||||
job.TaskGroups[0].Tasks[0].Resources.CPU = &availCPU
|
|
||||||
return job, *job.ID
|
return job, *job.ID
|
||||||
}
|
}
|
||||||
job1, jobID1 := getJob()
|
job1, jobID1 := getJob()
|
||||||
|
|
||||||
|
// Register initial job that should block subsequent job's placement until
|
||||||
|
// its shutdown_delay is up.
|
||||||
_, _, err := nomadClient.Jobs().Register(job1, nil)
|
_, _, err := nomadClient.Jobs().Register(job1, nil)
|
||||||
must.NoError(t, err)
|
must.NoError(t, err)
|
||||||
defer e2eutil.WaitForJobStopped(t, nomadClient, jobID1)
|
defer e2eutil.WaitForJobStopped(t, nomadClient, jobID1)
|
||||||
|
@ -81,6 +49,10 @@ func TestOverlap(t *testing.T) {
|
||||||
origAlloc.ID, jobID1, origAlloc.ClientStatus)
|
origAlloc.ID, jobID1, origAlloc.ClientStatus)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Capture node so we can ensure 2nd job is blocked by first
|
||||||
|
node, _, err := nomadClient.Nodes().Info(origAlloc.NodeID, nil)
|
||||||
|
must.NoError(t, err)
|
||||||
|
|
||||||
// Stop job but don't wait for ClientStatus terminal
|
// Stop job but don't wait for ClientStatus terminal
|
||||||
_, _, err = nomadClient.Jobs().Deregister(jobID1, false, nil)
|
_, _, err = nomadClient.Jobs().Deregister(jobID1, false, nil)
|
||||||
must.NoError(t, err)
|
must.NoError(t, err)
|
||||||
|
@ -94,9 +66,13 @@ func TestOverlap(t *testing.T) {
|
||||||
a.ID, ds, cs)
|
a.ID, ds, cs)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Start replacement job and assert it is blocked
|
// Start replacement job on same node and assert it is blocked
|
||||||
job2, jobID2 := getJob()
|
job2, jobID2 := getJob()
|
||||||
|
job2.Constraints = append(job2.Constraints, api.NewConstraint("${node.unique.id}", "=", origAlloc.NodeID))
|
||||||
job2.TaskGroups[0].Tasks[0].ShutdownDelay = 0 // no need on the followup
|
job2.TaskGroups[0].Tasks[0].ShutdownDelay = 0 // no need on the followup
|
||||||
|
availCPU := int(node.NodeResources.Cpu.CpuShares - int64(node.ReservedResources.Cpu.CpuShares))
|
||||||
|
job2.TaskGroups[0].Tasks[0].Resources.CPU = &availCPU // require job1 to free resources
|
||||||
|
|
||||||
resp, _, err := nomadClient.Jobs().Register(job2, nil)
|
resp, _, err := nomadClient.Jobs().Register(job2, nil)
|
||||||
must.NoError(t, err)
|
must.NoError(t, err)
|
||||||
defer e2eutil.WaitForJobStopped(t, nomadClient, jobID2)
|
defer e2eutil.WaitForJobStopped(t, nomadClient, jobID2)
|
||||||
|
@ -113,8 +89,8 @@ func TestOverlap(t *testing.T) {
|
||||||
// Wait for job1's ShutdownDelay for origAlloc.ClientStatus to go terminal
|
// Wait for job1's ShutdownDelay for origAlloc.ClientStatus to go terminal
|
||||||
sleepyTime := minStopTime.Sub(time.Now())
|
sleepyTime := minStopTime.Sub(time.Now())
|
||||||
if sleepyTime > 0 {
|
if sleepyTime > 0 {
|
||||||
t.Logf("Sleeping for the rest of the shutdown_delay (%.3s/%s)",
|
t.Logf("Followup job %s blocked. Sleeping for the rest of %s's shutdown_delay (%.3s/%s)",
|
||||||
sleepyTime, job1.TaskGroups[0].Tasks[0].ShutdownDelay)
|
*job2.ID, *job1.ID, sleepyTime, job1.TaskGroups[0].Tasks[0].ShutdownDelay)
|
||||||
time.Sleep(sleepyTime)
|
time.Sleep(sleepyTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
job "overlap" {
|
job "overlap" {
|
||||||
datacenters = ["dc1"]
|
datacenters = ["dc1", "dc2"]
|
||||||
type = "service"
|
type = "service"
|
||||||
|
|
||||||
constraint {
|
constraint {
|
||||||
|
@ -7,11 +7,6 @@ job "overlap" {
|
||||||
value = "linux"
|
value = "linux"
|
||||||
}
|
}
|
||||||
|
|
||||||
constraint {
|
|
||||||
attribute = "${node.unique.id}"
|
|
||||||
value = "<<Must be filled in by test>>"
|
|
||||||
}
|
|
||||||
|
|
||||||
group "overlap" {
|
group "overlap" {
|
||||||
count = 1
|
count = 1
|
||||||
|
|
||||||
|
@ -27,8 +22,7 @@ job "overlap" {
|
||||||
}
|
}
|
||||||
|
|
||||||
resources {
|
resources {
|
||||||
# Must be filled in by test
|
cpu = "500"
|
||||||
cpu = "0"
|
|
||||||
memory = "50"
|
memory = "50"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue