scheduler: stopped-yet-running allocs are still running (#10446)

* scheduler: stopped-yet-running allocs are still running * scheduler: test new stopped-but-running logic * test: assert nonoverlapping alloc behavior Also add a simpler Wait test helper to improve line numbers and save few lines of code. * docs: tried my best to describe #10446 it's not concise... feedback welcome * scheduler: fix test that allowed overlapping allocs * devices: only free devices when ClientStatus is terminal * test: output nicer failure message if err==nil Co-authored-by: Mahmood Ali <mahmood@hashicorp.com> Co-authored-by: Michael Schurter <mschurter@hashicorp.com>
2022-09-13 15:52:47 -04:00 · 2022-09-13 15:52:47 -04:00 · a9d5e4c510
parent 984e2cc4f1
commit a9d5e4c510
12 changed files with 474 additions and 19 deletions
--- a/.changelog/10446.txt
+++ b/.changelog/10446.txt
@ -0,0 +1,3 @@
 ```release-note:bug
 core: prevent new allocations from overlapping execution with stopping allocations
 ```
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@ -19,8 +19,10 @@ import (
 	"github.com/hashicorp/nomad/testutil"
 	"github.com/hashicorp/raft"
 	"github.com/kr/pretty"
 	"github.com/shoenig/test/must"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"golang.org/x/exp/slices"
 )
 func TestJobEndpoint_Register(t *testing.T) {
@ -109,6 +111,168 @@ func TestJobEndpoint_Register(t *testing.T) {
 	}
 }
 // TestJobEndpoint_Register_NonOverlapping asserts that ClientStatus must be
 // terminal, not just DesiredStatus, for the resources used by a job to be
 // considered free for subsequent placements to use.
 //
 // See: https://github.com/hashicorp/nomad/issues/10440
 func TestJobEndpoint_Register_NonOverlapping(t *testing.T) {
 	ci.Parallel(t)
 	s1, cleanupS1 := TestServer(t, func(c *Config) {
 	})
 	defer cleanupS1()
 	state := s1.fsm.State()
 	// Create a mock node with easy to check resources
 	node := mock.Node()
 	node.Resources = nil // Deprecated in 0.9
 	node.NodeResources.Cpu.CpuShares = 700
 	must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1, node))
 	codec := rpcClient(t, s1)
 	testutil.WaitForLeader(t, s1.RPC)
 	// Create the register request
 	job := mock.Job()
 	job.TaskGroups[0].Count = 1
 	req := &structs.JobRegisterRequest{
 		Job: job.Copy(),
 		WriteRequest: structs.WriteRequest{
 			Region:    "global",
 			Namespace: job.Namespace,
 		},
 	}
 	// Fetch the response
 	var resp structs.JobRegisterResponse
 	must.NoError(t, msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp))
 	must.NonZero(t, resp.Index)
 	// Assert placement
 	jobReq := &structs.JobSpecificRequest{
 		JobID: job.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:    "global",
 			Namespace: structs.DefaultNamespace,
 		},
 	}
 	var alloc *structs.AllocListStub
 	testutil.Wait(t, func() (bool, error) {
 		resp := structs.JobAllocationsResponse{}
 		must.NoError(t, msgpackrpc.CallWithCodec(codec, "Job.Allocations", jobReq, &resp))
 		if n := len(resp.Allocations); n != 1 {
 			return false, fmt.Errorf("expected 1 allocation but found %d:\n%v", n, resp.Allocations)
 		}
 		alloc = resp.Allocations[0]
 		return true, nil
 	})
 	must.Eq(t, alloc.NodeID, node.ID)
 	must.Eq(t, alloc.DesiredStatus, structs.AllocDesiredStatusRun)
 	must.Eq(t, alloc.ClientStatus, structs.AllocClientStatusPending)
 	// Stop
 	stopReq := &structs.JobDeregisterRequest{
 		JobID:        job.ID,
 		Purge:        false,
 		WriteRequest: req.WriteRequest,
 	}
 	var stopResp structs.JobDeregisterResponse
 	must.NoError(t, msgpackrpc.CallWithCodec(codec, "Job.Deregister", stopReq, &stopResp))
 	// Assert new register blocked
 	req.Job = job.Copy()
 	must.NoError(t, msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp))
 	must.NonZero(t, resp.Index)
 	blockedEval := ""
 	testutil.Wait(t, func() (bool, error) {
 		// Assert no new allocs
 		allocResp := structs.JobAllocationsResponse{}
 		must.NoError(t, msgpackrpc.CallWithCodec(codec, "Job.Allocations", jobReq, &allocResp))
 		if n := len(allocResp.Allocations); n != 1 {
 			return false, fmt.Errorf("expected 1 allocation but found %d:\n%v", n, allocResp.Allocations)
 		}
 		if alloc.ID != allocResp.Allocations[0].ID {
 			return false, fmt.Errorf("unexpected change in alloc: %#v", *allocResp.Allocations[0])
 		}
 		eval, err := state.EvalByID(nil, resp.EvalID)
 		must.NoError(t, err)
 		if eval == nil {
 			return false, fmt.Errorf("eval not applied: %s", resp.EvalID)
 		}
 		if eval.Status != structs.EvalStatusComplete {
 			return false, fmt.Errorf("expected eval to be complete but found: %s", eval.Status)
 		}
 		if eval.BlockedEval == "" {
 			return false, fmt.Errorf("expected a blocked eval to be created")
 		}
 		blockedEval = eval.BlockedEval
 		return true, nil
 	})
 	// Set ClientStatus=complete like a client would
 	stoppedAlloc := &structs.Allocation{
 		ID:     alloc.ID,
 		NodeID: alloc.NodeID,
 		TaskStates: map[string]*structs.TaskState{
 			"web": &structs.TaskState{
 				State: structs.TaskStateDead,
 			},
 		},
 		ClientStatus:     structs.AllocClientStatusComplete,
 		DeploymentStatus: nil, // should not have an impact
 		NetworkStatus:    nil, // should not have an impact
 	}
 	upReq := &structs.AllocUpdateRequest{
 		Alloc:        []*structs.Allocation{stoppedAlloc},
 		WriteRequest: req.WriteRequest,
 	}
 	var upResp structs.GenericResponse
 	must.NoError(t, msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", upReq, &upResp))
 	// Assert newer register's eval unblocked
 	testutil.Wait(t, func() (bool, error) {
 		eval, err := state.EvalByID(nil, blockedEval)
 		must.NoError(t, err)
 		must.NotNil(t, eval)
 		if eval.Status != structs.EvalStatusComplete {
 			return false, fmt.Errorf("expected blocked eval to be complete but found: %s", eval.Status)
 		}
 		return true, nil
 	})
 	// Assert new alloc placed
 	testutil.Wait(t, func() (bool, error) {
 		// Assert no new allocs
 		allocResp := structs.JobAllocationsResponse{}
 		must.NoError(t, msgpackrpc.CallWithCodec(codec, "Job.Allocations", jobReq, &allocResp))
 		if n := len(allocResp.Allocations); n != 2 {
 			return false, fmt.Errorf("expected 2 allocs but found %d:\n%v", n, allocResp.Allocations)
 		}
 		slices.SortFunc(allocResp.Allocations, func(a, b *structs.AllocListStub) bool {
 			return a.CreateIndex < b.CreateIndex
 		})
 		if alloc.ID != allocResp.Allocations[0].ID {
 			return false, fmt.Errorf("unexpected change in alloc: %#v", *allocResp.Allocations[0])
 		}
 		if cs := allocResp.Allocations[0].ClientStatus; cs != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("expected old alloc to be complete but found: %s", cs)
 		}
 		if cs := allocResp.Allocations[1].ClientStatus; cs != structs.AllocClientStatusPending {
 			return false, fmt.Errorf("expected new alloc to be pending but found: %s", cs)
 		}
 		return true, nil
 	})
 }
 func TestJobEndpoint_Register_PreserveCounts(t *testing.T) {
 	ci.Parallel(t)
 	require := require.New(t)
--- a/nomad/structs/devices.go
+++ b/nomad/structs/devices.go
@ -61,7 +61,7 @@ func NewDeviceAccounter(n *Node) *DeviceAccounter {
 func (d *DeviceAccounter) AddAllocs(allocs []*Allocation) (collision bool) {
 	for _, a := range allocs {
 		// Filter any terminal allocation
-		if a.TerminalStatus() {
+		if a.ClientTerminalStatus() {
 			continue
 		}
--- a/nomad/structs/devices_test.go
+++ b/nomad/structs/devices_test.go
@ -6,6 +6,7 @@ import (
 	"github.com/hashicorp/nomad/ci"
 	"github.com/hashicorp/nomad/helper/uuid"
 	psstructs "github.com/hashicorp/nomad/plugins/shared/structs"
 	"github.com/shoenig/test/must"
 	"github.com/stretchr/testify/require"
 )
@ -162,6 +163,35 @@ func TestDeviceAccounter_AddAllocs_Collision(t *testing.T) {
 	require.True(d.AddAllocs(allocs))
 }
 // Assert that devices are not freed when an alloc's ServerTerminalStatus is
 // true, but only when ClientTerminalStatus is true.
 func TestDeviceAccounter_AddAllocs_TerminalStatus(t *testing.T) {
 	ci.Parallel(t)
 	n := devNode()
 	d := NewDeviceAccounter(n)
 	// Create two allocations, both with the same device. First is being told to
 	// stop but has not stopped yet.
 	a1, a2 := nvidiaAlloc(), nvidiaAlloc()
 	a1.DesiredStatus = AllocDesiredStatusStop
 	a1.ClientStatus = AllocClientStatusRunning
 	nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID
 	a1.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID}
 	a2.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID}
 	allocs := []*Allocation{a1, a2}
 	// Since a1 has not stopped on the client, its device is still in use
 	must.True(t, d.AddAllocs(allocs))
 	// Assert that stop a1 on the client frees the device for use by a2
 	a1.ClientStatus = AllocClientStatusComplete
 	d = NewDeviceAccounter(n)
 	must.False(t, d.AddAllocs(allocs))
 }
 // Make sure that the device allocator works even if the node has no devices
 func TestDeviceAccounter_AddReserved_NoDeviceNode(t *testing.T) {
 	ci.Parallel(t)
--- a/nomad/structs/funcs.go
+++ b/nomad/structs/funcs.go
@ -173,7 +173,7 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
 	// For each alloc, add the resources
 	for _, alloc := range allocs {
 		// Do not consider the resource impact of terminal allocations
-		if alloc.TerminalStatus() {
+		if alloc.ClientTerminalStatus() {
 			continue
 		}
--- a/nomad/structs/funcs_test.go
+++ b/nomad/structs/funcs_test.go
@ -267,6 +267,7 @@ func TestAllocsFit_TerminalAlloc_Old(t *testing.T) {
 	// Should fit second allocation since it is terminal
 	a2 := a1.Copy()
 	a2.DesiredStatus = AllocDesiredStatusStop
 	a2.ClientStatus = AllocClientStatusComplete
 	fit, _, used, err = AllocsFit(n, []*Allocation{a1, a2}, nil, false)
 	require.NoError(err)
 	require.True(fit)
@ -494,6 +495,7 @@ func TestAllocsFit_TerminalAlloc(t *testing.T) {
 	// Should fit second allocation since it is terminal
 	a2 := a1.Copy()
 	a2.DesiredStatus = AllocDesiredStatusStop
 	a2.ClientStatus = AllocClientStatusComplete
 	fit, dim, used, err := AllocsFit(n, []*Allocation{a1, a2}, nil, false)
 	require.NoError(err)
 	require.True(fit, dim)
@ -501,6 +503,176 @@ func TestAllocsFit_TerminalAlloc(t *testing.T) {
 	require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
 }
 // TestAllocsFit_ClientTerminalAlloc asserts that allocs which have a terminal
 // ClientStatus *do not* have their resources counted as in-use.
 func TestAllocsFit_ClientTerminalAlloc(t *testing.T) {
 	ci.Parallel(t)
 	n := &Node{
 		ID: "test-node",
 		NodeResources: &NodeResources{
 			Cpu: NodeCpuResources{
 				CpuShares: 2000,
 			},
 			Memory: NodeMemoryResources{
 				MemoryMB: 2048,
 			},
 			Disk: NodeDiskResources{
 				DiskMB: 10000,
 			},
 			Networks: []*NetworkResource{
 				{
 					Device: "eth0",
 					CIDR:   "10.0.0.0/8",
 					IP:     "10.0.0.1",
 					MBits:  100,
 				},
 			},
 		},
 		ReservedResources: &NodeReservedResources{
 			Cpu: NodeReservedCpuResources{
 				CpuShares: 1000,
 			},
 			Memory: NodeReservedMemoryResources{
 				MemoryMB: 1024,
 			},
 			Disk: NodeReservedDiskResources{
 				DiskMB: 5000,
 			},
 			Networks: NodeReservedNetworkResources{
 				ReservedHostPorts: "80",
 			},
 		},
 	}
 	liveAlloc := &Allocation{
 		ID:            "test-alloc-live",
 		ClientStatus:  AllocClientStatusPending,
 		DesiredStatus: AllocDesiredStatusRun,
 		AllocatedResources: &AllocatedResources{
 			Tasks: map[string]*AllocatedTaskResources{
 				"web": {
 					Cpu: AllocatedCpuResources{
 						CpuShares: 1000,
 					},
 					Memory: AllocatedMemoryResources{
 						MemoryMB: 1024,
 					},
 					Networks: []*NetworkResource{
 						{
 							Device:        "eth0",
 							IP:            "10.0.0.1",
 							MBits:         50,
 							ReservedPorts: []Port{{"main", 8000, 80, ""}},
 						},
 					},
 				},
 			},
 			Shared: AllocatedSharedResources{
 				DiskMB: 5000,
 			},
 		},
 	}
 	deadAlloc := liveAlloc.Copy()
 	deadAlloc.ID = "test-alloc-dead"
 	deadAlloc.ClientStatus = AllocClientStatusFailed
 	deadAlloc.DesiredStatus = AllocDesiredStatusRun
 	// *Should* fit both allocations since deadAlloc is not running on the
 	// client
 	fit, _, used, err := AllocsFit(n, []*Allocation{liveAlloc, deadAlloc}, nil, false)
 	require.NoError(t, err)
 	require.True(t, fit)
 	require.EqualValues(t, 1000, used.Flattened.Cpu.CpuShares)
 	require.EqualValues(t, 1024, used.Flattened.Memory.MemoryMB)
 }
 // TestAllocsFit_ServerTerminalAlloc asserts that allocs which have a terminal
 // DesiredStatus but are still running on clients *do* have their resources
 // counted as in-use.
 func TestAllocsFit_ServerTerminalAlloc(t *testing.T) {
 	ci.Parallel(t)
 	n := &Node{
 		ID: "test-node",
 		NodeResources: &NodeResources{
 			Cpu: NodeCpuResources{
 				CpuShares: 2000,
 			},
 			Memory: NodeMemoryResources{
 				MemoryMB: 2048,
 			},
 			Disk: NodeDiskResources{
 				DiskMB: 10000,
 			},
 			Networks: []*NetworkResource{
 				{
 					Device: "eth0",
 					CIDR:   "10.0.0.0/8",
 					IP:     "10.0.0.1",
 					MBits:  100,
 				},
 			},
 		},
 		ReservedResources: &NodeReservedResources{
 			Cpu: NodeReservedCpuResources{
 				CpuShares: 1000,
 			},
 			Memory: NodeReservedMemoryResources{
 				MemoryMB: 1024,
 			},
 			Disk: NodeReservedDiskResources{
 				DiskMB: 5000,
 			},
 			Networks: NodeReservedNetworkResources{
 				ReservedHostPorts: "80",
 			},
 		},
 	}
 	liveAlloc := &Allocation{
 		ID:            "test-alloc-live",
 		ClientStatus:  AllocClientStatusPending,
 		DesiredStatus: AllocDesiredStatusRun,
 		AllocatedResources: &AllocatedResources{
 			Tasks: map[string]*AllocatedTaskResources{
 				"web": {
 					Cpu: AllocatedCpuResources{
 						CpuShares: 1000,
 					},
 					Memory: AllocatedMemoryResources{
 						MemoryMB: 1024,
 					},
 					Networks: []*NetworkResource{
 						{
 							Device:        "eth0",
 							IP:            "10.0.0.1",
 							MBits:         50,
 							ReservedPorts: []Port{{"main", 8000, 80, ""}},
 						},
 					},
 				},
 			},
 			Shared: AllocatedSharedResources{
 				DiskMB: 5000,
 			},
 		},
 	}
 	deadAlloc := liveAlloc.Copy()
 	deadAlloc.ID = "test-alloc-dead"
 	deadAlloc.ClientStatus = AllocClientStatusRunning
 	deadAlloc.DesiredStatus = AllocDesiredStatusStop
 	// Should *not* fit both allocations since deadAlloc is still running
 	fit, _, used, err := AllocsFit(n, []*Allocation{liveAlloc, deadAlloc}, nil, false)
 	require.NoError(t, err)
 	require.False(t, fit)
 	require.EqualValues(t, 2000, used.Flattened.Cpu.CpuShares)
 	require.EqualValues(t, 2048, used.Flattened.Memory.MemoryMB)
 }
 // Tests that AllocsFit detects device collisions
 func TestAllocsFit_Devices(t *testing.T) {
 	ci.Parallel(t)
--- a/nomad/structs/network.go
+++ b/nomad/structs/network.go
@ -347,7 +347,7 @@ func (idx *NetworkIndex) SetNode(node *Node) error {
 func (idx *NetworkIndex) AddAllocs(allocs []*Allocation) (collide bool, reason string) {
 	for _, alloc := range allocs {
 		// Do not consider the resource impact of terminal allocations
-		if alloc.TerminalStatus() {
+		if alloc.ClientTerminalStatus() {
 			continue
 		}
--- a/nomad/structs/network_test.go
+++ b/nomad/structs/network_test.go
@ -7,6 +7,7 @@ import (
 	"testing"
 	"github.com/hashicorp/nomad/ci"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@ -201,6 +202,8 @@ func TestNetworkIndex_AddAllocs(t *testing.T) {
 	idx := NewNetworkIndex()
 	allocs := []*Allocation{
 		{
 			ClientStatus:  AllocClientStatusRunning,
 			DesiredStatus: AllocDesiredStatusRun,
 			AllocatedResources: &AllocatedResources{
 				Tasks: map[string]*AllocatedTaskResources{
 					"web": {
@ -217,6 +220,8 @@ func TestNetworkIndex_AddAllocs(t *testing.T) {
 			},
 		},
 		{
 			ClientStatus:  AllocClientStatusRunning,
 			DesiredStatus: AllocDesiredStatusRun,
 			AllocatedResources: &AllocatedResources{
 				Tasks: map[string]*AllocatedTaskResources{
 					"api": {
@ -232,24 +237,56 @@ func TestNetworkIndex_AddAllocs(t *testing.T) {
 				},
 			},
 		},
 		{
 			// Allocations running on clients should have their
 			// ports counted even if their DesiredStatus=stop
 			ClientStatus:  AllocClientStatusRunning,
 			DesiredStatus: AllocDesiredStatusStop,
 			AllocatedResources: &AllocatedResources{
 				Tasks: map[string]*AllocatedTaskResources{
 					"api": {
 						Networks: []*NetworkResource{
 							{
 								Device:        "eth0",
 								IP:            "192.168.0.100",
 								MBits:         50,
 								ReservedPorts: []Port{{"one", 10001, 0, ""}},
 							},
 						},
 					},
 				},
 			},
 		},
 		{
 			// Allocations *not* running on clients should *not*
 			// have their ports counted even if their
 			// DesiredStatus=run
 			ClientStatus:  AllocClientStatusFailed,
 			DesiredStatus: AllocDesiredStatusRun,
 			AllocatedResources: &AllocatedResources{
 				Tasks: map[string]*AllocatedTaskResources{
 					"api": {
 						Networks: []*NetworkResource{
 							{
 								Device:        "eth0",
 								IP:            "192.168.0.100",
 								MBits:         50,
 								ReservedPorts: []Port{{"one", 10001, 0, ""}},
 							},
 						},
 					},
 				},
 			},
 		},
 	}
 	collide, reason := idx.AddAllocs(allocs)
-	if collide || reason != "" {
+	assert.False(t, collide)
-		t.Fatalf("bad")
+	assert.Empty(t, reason)
 	}
-	if idx.UsedBandwidth["eth0"] != 70 {
+	assert.True(t, idx.UsedPorts["192.168.0.100"].Check(8000))
-		t.Fatalf("Bad")
+	assert.True(t, idx.UsedPorts["192.168.0.100"].Check(9000))
-	}
+	assert.True(t, idx.UsedPorts["192.168.0.100"].Check(10000))
-	if !idx.UsedPorts["192.168.0.100"].Check(8000) {
+	assert.True(t, idx.UsedPorts["192.168.0.100"].Check(10001))
 		t.Fatalf("Bad")
 	}
 	if !idx.UsedPorts["192.168.0.100"].Check(9000) {
 		t.Fatalf("Bad")
 	}
 	if !idx.UsedPorts["192.168.0.100"].Check(10000) {
 		t.Fatalf("Bad")
 	}
 }
 func TestNetworkIndex_AddReserved(t *testing.T) {
--- a/scheduler/context.go
+++ b/scheduler/context.go
@ -173,7 +173,7 @@ func (e *EvalContext) Reset() {
 func (e *EvalContext) ProposedAllocs(nodeID string) ([]*structs.Allocation, error) {
 	// Get the existing allocations that are non-terminal
 	ws := memdb.NewWatchSet()
-	proposed, err := e.state.AllocsByNodeTerminal(ws, nodeID, false)
+	proposed, err := e.state.AllocsByNode(ws, nodeID)
 	if err != nil {
 		return nil, err
 	}
@ -194,6 +194,10 @@ func (e *EvalContext) ProposedAllocs(nodeID string) ([]*structs.Allocation, erro
 	// update occurs, we do not double count and we override the old allocation.
 	proposedIDs := make(map[string]*structs.Allocation, len(proposed))
 	for _, alloc := range proposed {
 		if alloc.ClientTerminalStatus() {
 			continue
 		}
 		proposedIDs[alloc.ID] = alloc
 	}
 	for _, alloc := range e.plan.NodeAllocation[nodeID] {
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@ -1596,6 +1596,7 @@ func TestServiceSched_JobModify(t *testing.T) {
 		alloc.NodeID = nodes[i].ID
 		alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
 		alloc.DesiredStatus = structs.AllocDesiredStatusStop
 		alloc.ClientStatus = structs.AllocClientStatusFailed // #10446
 		terminal = append(terminal, alloc)
 	}
 	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), terminal))
--- a/testutil/wait.go
+++ b/testutil/wait.go
@ -14,6 +14,28 @@ import (
 type testFn func() (bool, error)
 type errorFn func(error)
 func Wait(t *testing.T, test testFn) {
 	t.Helper()
 	retries := 500 * TestMultiplier()
 	for retries > 0 {
 		time.Sleep(10 * time.Millisecond)
 		retries--
 		success, err := test()
 		if success {
 			return
 		}
 		if retries == 0 {
 			if err == nil {
 				t.Fatalf("timeout waiting for test function to succeed (you should probably return a helpful error instead of nil!)")
 			} else {
 				t.Fatalf("timeout: %v", err)
 			}
 		}
 	}
 }
 func WaitForResult(test testFn, error errorFn) {
 	WaitForResultRetries(500*TestMultiplier(), test, error)
 }
--- a/website/content/docs/upgrade/upgrade-specific.mdx
+++ b/website/content/docs/upgrade/upgrade-specific.mdx
@ -30,6 +30,25 @@ Audit Log filtering in previous versions of Nomad handled `stages` and
 would be filtered. As of 1.4.0, `stages` and `operations` are treated as `AND
 filters`. Logs will only be filtered if all filter conditions match.
 #### Prevent Overlapping New Allocations with Stopping Allocations
 Prior to Nomad 1.4.0 the scheduler would consider the resources used by
 allocations that are in the process of stopping to be free for new allocations
 to use. This could cause newer allocations to crash when they try to use TCP
 ports or memory used by an allocation in the process of stopping. The new and
 stopping [allocations would "overlap" improperly.][alloc_overlap]
 [Nomad 1.4.0 fixes this behavior][gh_10446] so that an allocation's resources
 are only considered free for reuse once the client node the allocation was
 running on reports it has stopped. Technically speaking: only once the
 `Allocation.ClientStatus` has reached a terminal state (`complete`, `failed`,
 or `lost`).
 Despite this being a bug fix, it is considered a significant enough change in
 behavior to reserve for a major Nomad release and *not* be backported. Please
 report any negative side effects encountered as [new
 issues.][gh_issue]
 ## Nomad 1.3.3
 Environments that don't support the use of [`uid`][template_uid] and
@ -1471,3 +1490,6 @@ deleted and then Nomad 0.3.0 can be launched.
 [consul_acl]: https://github.com/hashicorp/consul/issues/7414
 [kill_timeout]: /docs/job-specification/task#kill_timeout
 [max_kill_timeout]: /docs/configuration/client#max_kill_timeout
 [alloc_overlap]: https://github.com/hashicorp/nomad/issues/10440
 [gh_10446]: https://github.com/hashicorp/nomad/pull/10446#issuecomment-1224833906
 [gh_issue]: https://github.com/hashicorp/nomad/issues/new/choose