diff --git a/.changelog/15372.txt b/.changelog/15372.txt new file mode 100644 index 000000000..e44c391da --- /dev/null +++ b/.changelog/15372.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where volumes in non-default namespaces could not be scheduled for system or sysbatch jobs +``` diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index 14ae817d9..05a4ec6a2 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -13,6 +13,7 @@ import ( "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -2986,3 +2987,90 @@ func TestSystemSched_NodeDisconnected(t *testing.T) { }) } } + +func TestSystemSched_CSITopology(t *testing.T) { + ci.Parallel(t) + h := NewHarness(t) + + zones := []string{"zone-0", "zone-1", "zone-2", "zone-3"} + + // Create some nodes, each running a CSI plugin with topology for + // a different "zone" + for i := 0; i < 12; i++ { + node := mock.Node() + node.Datacenter = zones[i%4] + node.CSINodePlugins = map[string]*structs.CSIInfo{ + "test-plugin-" + zones[i%4]: { + PluginID: "test-plugin-" + zones[i%4], + Healthy: true, + NodeInfo: &structs.CSINodeInfo{ + MaxVolumes: 3, + AccessibleTopology: &structs.CSITopology{ + Segments: map[string]string{"zone": zones[i%4]}}, + }, + }, + } + must.NoError(t, h.State.UpsertNode( + structs.MsgTypeTestSetup, h.NextIndex(), node)) + } + + // create a non-default namespace for the job and volume + ns := "non-default-namespace" + must.NoError(t, h.State.UpsertNamespaces(h.NextIndex(), + []*structs.Namespace{{Name: ns}})) + + // create a volume that lives in one zone + vol0 := structs.NewCSIVolume("myvolume", 0) + vol0.PluginID = "test-plugin-zone-0" + vol0.Namespace = ns + vol0.AccessMode = structs.CSIVolumeAccessModeMultiNodeMultiWriter + vol0.AttachmentMode = structs.CSIVolumeAttachmentModeFilesystem + vol0.RequestedTopologies = &structs.CSITopologyRequest{ + Required: []*structs.CSITopology{{ + Segments: map[string]string{"zone": "zone-0"}, + }}, + } + + must.NoError(t, h.State.UpsertCSIVolume( + h.NextIndex(), []*structs.CSIVolume{vol0})) + + // Create a job that uses that volumes + job := mock.SystemJob() + job.Datacenters = zones + job.Namespace = ns + job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{ + "myvolume": { + Type: "csi", + Name: "unique", + Source: "myvolume", + }, + } + + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job)) + + // Create a mock evaluation to register the job + eval := &structs.Evaluation{ + Namespace: ns, + ID: uuid.Generate(), + Priority: job.Priority, + TriggeredBy: structs.EvalTriggerJobRegister, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + + must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, + h.NextIndex(), []*structs.Evaluation{eval})) + + // Process the evaluation and expect a single plan without annotations + err := h.Process(NewSystemScheduler, eval) + must.NoError(t, err) + + must.Len(t, 1, h.Plans, must.Sprint("expected one plan")) + must.Nil(t, h.Plans[0].Annotations, must.Sprint("expected no annotations")) + + // Expect the eval has not spawned a blocked eval + must.Eq(t, len(h.CreateEvals), 0) + must.Eq(t, "", h.Evals[0].BlockedEval, must.Sprint("did not expect a blocked eval")) + must.Eq(t, structs.EvalStatusComplete, h.Evals[0].Status) + +} diff --git a/scheduler/stack.go b/scheduler/stack.go index b71916af0..6e362f637 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -300,6 +300,8 @@ func (s *SystemStack) SetJob(job *structs.Job) { s.distinctPropertyConstraint.SetJob(job) s.binPack.SetJob(job) s.ctx.Eligibility().SetJob(job) + s.taskGroupCSIVolumes.SetNamespace(job.Namespace) + s.taskGroupCSIVolumes.SetJobID(job.ID) if contextual, ok := s.quota.(ContextualIterator); ok { contextual.SetJob(job)