From 86576953223a4e3f11e621885e329cd177882667 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 23 Nov 2022 16:47:35 -0500 Subject: [PATCH] scheduler: set job on system stack for CSI feasibility check (#15372) When the scheduler checks feasibility of each node, it creates a "stack" which carries attributes of the job and task group it needs to check feasibility for. The `system` and `sysbatch` scheduler use a different stack than `service` and `batch` jobs. This stack was missing the call to set the job ID and namespace for the CSI check. This prevents CSI volumes from being scheduled for system jobs whenever the volume is in a non-default namespace. Set the job ID and namespace to match the generic scheduler. --- .changelog/15372.txt | 3 + scheduler/scheduler_system_test.go | 88 ++++++++++++++++++++++++++++++ scheduler/stack.go | 2 + 3 files changed, 93 insertions(+) create mode 100644 .changelog/15372.txt diff --git a/.changelog/15372.txt b/.changelog/15372.txt new file mode 100644 index 000000000..e44c391da --- /dev/null +++ b/.changelog/15372.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where volumes in non-default namespaces could not be scheduled for system or sysbatch jobs +``` diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index 14ae817d9..05a4ec6a2 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -13,6 +13,7 @@ import ( "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -2986,3 +2987,90 @@ func TestSystemSched_NodeDisconnected(t *testing.T) { }) } } + +func TestSystemSched_CSITopology(t *testing.T) { + ci.Parallel(t) + h := NewHarness(t) + + zones := []string{"zone-0", "zone-1", "zone-2", "zone-3"} + + // Create some nodes, each running a CSI plugin with topology for + // a different "zone" + for i := 0; i < 12; i++ { + node := mock.Node() + node.Datacenter = zones[i%4] + node.CSINodePlugins = map[string]*structs.CSIInfo{ + "test-plugin-" + zones[i%4]: { + PluginID: "test-plugin-" + zones[i%4], + Healthy: true, + NodeInfo: &structs.CSINodeInfo{ + MaxVolumes: 3, + AccessibleTopology: &structs.CSITopology{ + Segments: map[string]string{"zone": zones[i%4]}}, + }, + }, + } + must.NoError(t, h.State.UpsertNode( + structs.MsgTypeTestSetup, h.NextIndex(), node)) + } + + // create a non-default namespace for the job and volume + ns := "non-default-namespace" + must.NoError(t, h.State.UpsertNamespaces(h.NextIndex(), + []*structs.Namespace{{Name: ns}})) + + // create a volume that lives in one zone + vol0 := structs.NewCSIVolume("myvolume", 0) + vol0.PluginID = "test-plugin-zone-0" + vol0.Namespace = ns + vol0.AccessMode = structs.CSIVolumeAccessModeMultiNodeMultiWriter + vol0.AttachmentMode = structs.CSIVolumeAttachmentModeFilesystem + vol0.RequestedTopologies = &structs.CSITopologyRequest{ + Required: []*structs.CSITopology{{ + Segments: map[string]string{"zone": "zone-0"}, + }}, + } + + must.NoError(t, h.State.UpsertCSIVolume( + h.NextIndex(), []*structs.CSIVolume{vol0})) + + // Create a job that uses that volumes + job := mock.SystemJob() + job.Datacenters = zones + job.Namespace = ns + job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{ + "myvolume": { + Type: "csi", + Name: "unique", + Source: "myvolume", + }, + } + + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job)) + + // Create a mock evaluation to register the job + eval := &structs.Evaluation{ + Namespace: ns, + ID: uuid.Generate(), + Priority: job.Priority, + TriggeredBy: structs.EvalTriggerJobRegister, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + + must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, + h.NextIndex(), []*structs.Evaluation{eval})) + + // Process the evaluation and expect a single plan without annotations + err := h.Process(NewSystemScheduler, eval) + must.NoError(t, err) + + must.Len(t, 1, h.Plans, must.Sprint("expected one plan")) + must.Nil(t, h.Plans[0].Annotations, must.Sprint("expected no annotations")) + + // Expect the eval has not spawned a blocked eval + must.Eq(t, len(h.CreateEvals), 0) + must.Eq(t, "", h.Evals[0].BlockedEval, must.Sprint("did not expect a blocked eval")) + must.Eq(t, structs.EvalStatusComplete, h.Evals[0].Status) + +} diff --git a/scheduler/stack.go b/scheduler/stack.go index b71916af0..6e362f637 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -300,6 +300,8 @@ func (s *SystemStack) SetJob(job *structs.Job) { s.distinctPropertyConstraint.SetJob(job) s.binPack.SetJob(job) s.ctx.Eligibility().SetJob(job) + s.taskGroupCSIVolumes.SetNamespace(job.Namespace) + s.taskGroupCSIVolumes.SetJobID(job.ID) if contextual, ok := s.quota.(ContextualIterator); ok { contextual.SetJob(job)