diff --git a/.changelog/18269.txt b/.changelog/18269.txt new file mode 100644 index 000000000..435b5b875 --- /dev/null +++ b/.changelog/18269.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Ignore stale server updates to prevent GCing allocations that should be running +``` diff --git a/client/client.go b/client/client.go index 52747d6d8..c5a9a4250 100644 --- a/client/client.go +++ b/client/client.go @@ -2332,6 +2332,18 @@ OUTER: default: } + // We have not received any new data, or received stale data. This may happen in + // an array of situations, the worst of which seems to be a blocking request + // timeout when the scheduler which we are contacting is newly added or recovering + // after a prolonged downtime. + // + // For full context, please see https://github.com/hashicorp/nomad/issues/18267 + if resp.Index <= req.MinQueryIndex { + c.logger.Debug("Received stale allocation information. Retrying.", + "index", resp.Index, "min_index", req.MinQueryIndex) + continue OUTER + } + // Filter all allocations whose AllocModifyIndex was not incremented. // These are the allocations who have either not been updated, or whose // updates are a result of the client sending an update for the alloc.