diff --git a/nomad/core_sched.go b/nomad/core_sched.go index 7275af61d..0df6f499e 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -471,6 +471,8 @@ func (c *CoreScheduler) deploymentGC(eval *structs.Evaluation) error { // Collect the deployments to GC var gcDeployment []string + +OUTER: for { raw := iter.Next() if raw == nil { @@ -483,6 +485,21 @@ func (c *CoreScheduler) deploymentGC(eval *structs.Evaluation) error { continue } + // Ensure there are no allocs referencing this deployment. + allocs, err := c.snap.AllocsByDeployment(ws, deploy.ID) + if err != nil { + c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for deployment %s: %v", + deploy.ID, err) + continue + } + + // Ensure there is no allocation referencing the deployment. + for _, alloc := range allocs { + if !alloc.TerminalStatus() { + continue OUTER + } + } + // Deployment is eligible for garbage collection gcDeployment = append(gcDeployment, deploy.ID) } diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index 76da207dd..c6e0beabb 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -1325,12 +1325,19 @@ func TestCoreScheduler_DeploymentGC(t *testing.T) { // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) - // Insert terminal and active deployment + // Insert an active, terminal, and terminal with allocations edeployment state := s1.fsm.State() - d1, d2 := mock.Deployment(), mock.Deployment() + d1, d2, d3 := mock.Deployment(), mock.Deployment(), mock.Deployment() d1.Status = structs.DeploymentStatusFailed + d3.Status = structs.DeploymentStatusSuccessful assert.Nil(state.UpsertDeployment(1000, d1), "UpsertDeployment") assert.Nil(state.UpsertDeployment(1001, d2), "UpsertDeployment") + assert.Nil(state.UpsertDeployment(1002, d3), "UpsertDeployment") + + a := mock.Alloc() + a.JobID = d3.JobID + a.DeploymentID = d3.ID + assert.Nil(state.UpsertAllocs(1003, []*structs.Allocation{a}), "UpsertAllocs") // Update the time tables to make this work tt := s1.fsm.TimeTable() @@ -1353,6 +1360,9 @@ func TestCoreScheduler_DeploymentGC(t *testing.T) { out2, err := state.DeploymentByID(ws, d2.ID) assert.Nil(err, "DeploymentByID") assert.NotNil(out2, "Active Deployment") + out3, err := state.DeploymentByID(ws, d3.ID) + assert.Nil(err, "DeploymentByID") + assert.NotNil(out3, "Terminal Deployment With Allocs") } func TestCoreScheduler_DeploymentGC_Force(t *testing.T) { diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index cd7ee3f0b..601999352 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -2543,7 +2543,7 @@ func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *st return err } if deployment == nil { - return fmt.Errorf("allocation %q references unknown deployment %q", alloc.ID, alloc.DeploymentID) + return nil } // Retrieve the deployment state object