test: deflake node drain intergration test (#18171)

The `TestDrainer_AllTypes_NoDeadline` test has been flaky. It looks like this
might be because the final update of batch allocations to complete is improperly
updating the state store directly rather than by RPC. If the service jobs have
restarted in the meantime, the `allocClientStateSimulator` will have updated the
index on the allocations table and that will prevent the drainer from
unblocking (and being marked complete) when the batch jobs are written with an
earlier index.

This changeset attempts to fix that by making the update via RPC (as it normally
would be in real code).
This commit is contained in:
Tim Gross 2023-08-14 16:17:25 -04:00
parent 577d96034d
commit a3a86a849a
1 changed files with 10 additions and 6 deletions

View File

@ -26,9 +26,9 @@ import (
)
// allocClientStateSimulator simulates the updates in state from the
// client. allocations that are new on the server get marked with healthy
// deployments, and allocations that are DesiredStatus=stop on the server get
// updates with terminal client status.
// client. service allocations that are new on the server get marked with
// healthy deployments, and service allocations that are DesiredStatus=stop on
// the server get updates with terminal client status.
func allocClientStateSimulator(t *testing.T, errCh chan<- error, ctx context.Context,
srv *Server, nodeID string, logger log.Logger) {
@ -529,9 +529,13 @@ func TestDrainer_AllTypes_NoDeadline(t *testing.T) {
new.ClientStatus = structs.AllocClientStatusComplete
updates = append(updates, new)
}
index, _ := store.LatestIndex()
index++
must.NoError(t, store.UpdateAllocsFromClient(structs.MsgTypeTestSetup, index, updates))
batchDoneReq := &structs.AllocUpdateRequest{
Alloc: updates,
WriteRequest: structs.WriteRequest{Region: "global"},
}
err = msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", batchDoneReq, &resp)
must.NoError(t, err)
// Wait for the service allocations to be replaced
waitForPlacedAllocs(t, store, n2.ID, 3)