open-nomad/e2e/systemsched/systemsched.go

146 lines
4.0 KiB
Go

package systemsched
import (
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/stretchr/testify/require"
)
type SystemSchedTest struct {
framework.TC
jobIDs []string
}
func init() {
framework.AddSuites(&framework.TestSuite{
Component: "SystemScheduler",
CanRunLocal: true,
Cases: []framework.TestCase{
new(SystemSchedTest),
},
})
}
func (tc *SystemSchedTest) BeforeAll(f *framework.F) {
// Ensure cluster has leader before running tests
e2eutil.WaitForLeader(f.T(), tc.Nomad())
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4)
}
func (tc *SystemSchedTest) TestJobUpdateOnIneligbleNode(f *framework.F) {
t := f.T()
nomadClient := tc.Nomad()
jobID := "system_deployment"
tc.jobIDs = append(tc.jobIDs, jobID)
e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "systemsched/input/system_job0.nomad", jobID, "")
jobs := nomadClient.Jobs()
allocs, _, err := jobs.Allocations(jobID, true, nil)
require.NoError(t, err)
var allocIDs []string
for _, alloc := range allocs {
allocIDs = append(allocIDs, alloc.ID)
}
// Wait for allocations to get past initial pending state
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
// Mark one node as ineligible
nodesAPI := tc.Nomad().Nodes()
disabledNodeID := allocs[0].NodeID
_, err = nodesAPI.ToggleEligibility(disabledNodeID, false, nil)
require.NoError(t, err)
// Assert all jobs still running
jobs = nomadClient.Jobs()
allocs, _, err = jobs.Allocations(jobID, true, nil)
allocIDs = nil
for _, alloc := range allocs {
allocIDs = append(allocIDs, alloc.ID)
}
require.NoError(t, err)
allocForDisabledNode := make(map[string]*api.AllocationListStub)
// Wait for allocs to run and collect allocs on ineligible node
// Allocation could have failed, ensure there is one thats running
// and that it is the correct version (0)
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
for _, alloc := range allocs {
if alloc.NodeID == disabledNodeID {
allocForDisabledNode[alloc.ID] = alloc
}
}
// Filter down to only our latest running alloc
for _, alloc := range allocForDisabledNode {
require.Equal(t, uint64(0), alloc.JobVersion)
if alloc.ClientStatus == structs.AllocClientStatusComplete {
// remove the old complete alloc from map
delete(allocForDisabledNode, alloc.ID)
}
}
require.NotEmpty(t, allocForDisabledNode)
require.Len(t, allocForDisabledNode, 1)
// Update job
e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "systemsched/input/system_job1.nomad", jobID, "")
// Get updated allocations
jobs = nomadClient.Jobs()
allocs, _, err = jobs.Allocations(jobID, false, nil)
require.NoError(t, err)
allocIDs = nil
for _, alloc := range allocs {
allocIDs = append(allocIDs, alloc.ID)
}
// Wait for allocs to start
e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
// Get latest alloc status now that they are no longer pending
allocs, _, err = jobs.Allocations(jobID, false, nil)
require.NoError(t, err)
var foundPreviousAlloc bool
for _, dAlloc := range allocForDisabledNode {
for _, alloc := range allocs {
if alloc.ID == dAlloc.ID {
foundPreviousAlloc = true
require.Equal(t, uint64(0), alloc.JobVersion)
} else if alloc.ClientStatus == structs.AllocClientStatusRunning {
// Ensure allocs running on non disabled node are
// newer version
require.Equal(t, uint64(1), alloc.JobVersion)
}
}
}
require.True(t, foundPreviousAlloc, "unable to find previous alloc for ineligible node")
}
func (tc *SystemSchedTest) AfterEach(f *framework.F) {
nomadClient := tc.Nomad()
// Mark all nodes eligible
nodesAPI := tc.Nomad().Nodes()
nodes, _, _ := nodesAPI.List(nil)
for _, node := range nodes {
nodesAPI.ToggleEligibility(node.ID, true, nil)
}
jobs := nomadClient.Jobs()
// Stop all jobs in test
for _, id := range tc.jobIDs {
jobs.Deregister(id, true, nil)
}
tc.jobIDs = []string{}
// Garbage collect
nomadClient.System().GarbageCollect()
}