client: fix waiting on preempted alloc (#12779)
Fixes #10200 **The bug** A user reported receiving the following error when an alloc was placed that needed to preempt existing allocs: ``` [ERROR] client.alloc_watcher: error querying previous alloc: alloc_id=28... previous_alloc=8e... error="rpc error: alloc lookup failed: index error: UUID must be 36 characters" ``` The previous alloc (8e) was already complete on the client. This is possible if an alloc stops *after* the scheduling decision was made to preempt it, but *before* the node running both allocations was able to pull and start the preemptor. While that is hopefully a narrow window of time, you can expect it to occur in high throughput batch scheduling heavy systems. However the RPC error made no sense! `previous_alloc` in the logs was a valid 36 character UUID! **The fix** The fix is: ``` - prevAllocID: c.Alloc.PreviousAllocation, + prevAllocID: watchedAllocID, ``` The alloc watcher new func used for preemption improperly referenced Alloc.PreviousAllocation instead of the passed in watchedAllocID. When multiple allocs are preempted, a watcher is created for each with watchedAllocID set properly by the caller. In this case Alloc.PreviousAllocation="" -- which is where the `UUID must be 36 characters` error was coming from! Sadly we were properly referencing watchedAllocID in the log, so it made the error make no sense! **The repro** I was able to reproduce this with a dev agent with [preemption enabled](https://gist.github.com/schmichael/53f79cbd898afdfab76865ad8c7fc6a0#file-preempt-hcl) and [lowered limits](https://gist.github.com/schmichael/53f79cbd898afdfab76865ad8c7fc6a0#file-limits-hcl) for ease of repro. First I started a [low priority count 3 job](https://gist.github.com/schmichael/53f79cbd898afdfab76865ad8c7fc6a0#file-preempt-lo-nomad), then a [high priority job](https://gist.github.com/schmichael/53f79cbd898afdfab76865ad8c7fc6a0#file-preempt-hi-nomad) that evicts 2 low priority jobs. Everything worked as expected. However if I force it to use the [remotePrevAlloc implementation](https://github.com/hashicorp/nomad/blob/v1.3.0-beta.1/client/allocwatcher/alloc_watcher.go#L147), it reproduces the bug because the watcher references PreviousAllocation instead of watchedAllocID.
This commit is contained in:
parent
cfd353207f
commit
e2544dd089
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
client: Fixed a bug that could prevent a preempting alloc from ever starting.
|
||||
```
|
|
@ -102,7 +102,7 @@ func newMigratorForAlloc(c Config, tg *structs.TaskGroup, watchedAllocID string,
|
|||
migrate := tg.EphemeralDisk != nil && tg.EphemeralDisk.Migrate
|
||||
|
||||
if m != nil {
|
||||
// Local Allocation because there's no meta
|
||||
// Local Allocation because there's an alloc runner
|
||||
return &localPrevAlloc{
|
||||
allocID: c.Alloc.ID,
|
||||
prevAllocID: watchedAllocID,
|
||||
|
@ -117,7 +117,7 @@ func newMigratorForAlloc(c Config, tg *structs.TaskGroup, watchedAllocID string,
|
|||
|
||||
return &remotePrevAlloc{
|
||||
allocID: c.Alloc.ID,
|
||||
prevAllocID: c.Alloc.PreviousAllocation,
|
||||
prevAllocID: watchedAllocID,
|
||||
tasks: tasks,
|
||||
config: c.Config,
|
||||
migrate: migrate,
|
||||
|
@ -127,11 +127,17 @@ func newMigratorForAlloc(c Config, tg *structs.TaskGroup, watchedAllocID string,
|
|||
}
|
||||
}
|
||||
|
||||
// newWatcherForAlloc uses a local or rpc-based watcher depending on whether
|
||||
// AllocRunnerMeta is nil or not.
|
||||
//
|
||||
// Note that c.Alloc.PreviousAllocation must NOT be used in this func as it
|
||||
// used for preemption which has a distinct field. The caller is responsible
|
||||
// for passing the allocation to be watched as watchedAllocID.
|
||||
func newWatcherForAlloc(c Config, watchedAllocID string, m AllocRunnerMeta) PrevAllocWatcher {
|
||||
logger := c.Logger.Named("alloc_watcher").With("alloc_id", c.Alloc.ID).With("previous_alloc", watchedAllocID)
|
||||
|
||||
if m != nil {
|
||||
// Local Allocation because there's no meta
|
||||
// Local Allocation because there's an alloc runner
|
||||
return &localPrevAlloc{
|
||||
allocID: c.Alloc.ID,
|
||||
prevAllocID: watchedAllocID,
|
||||
|
@ -144,7 +150,7 @@ func newWatcherForAlloc(c Config, watchedAllocID string, m AllocRunnerMeta) Prev
|
|||
|
||||
return &remotePrevAlloc{
|
||||
allocID: c.Alloc.ID,
|
||||
prevAllocID: c.Alloc.PreviousAllocation,
|
||||
prevAllocID: watchedAllocID,
|
||||
config: c.Config,
|
||||
rpc: c.RPC,
|
||||
migrateToken: c.MigrateToken,
|
||||
|
@ -152,9 +158,12 @@ func newWatcherForAlloc(c Config, watchedAllocID string, m AllocRunnerMeta) Prev
|
|||
}
|
||||
}
|
||||
|
||||
// NewAllocWatcher creates a PrevAllocWatcher appropriate for whether this
|
||||
// alloc's previous allocation was local or remote. If this alloc has no
|
||||
// previous alloc then a noop implementation is returned.
|
||||
// NewAllocWatcher creates a PrevAllocWatcher if either PreviousAllocation or
|
||||
// PreemptedRunners are set. If any of the allocs to watch have local runners,
|
||||
// wait for them to terminate directly.
|
||||
// For allocs which are either running on another node or have already
|
||||
// terminated their alloc runners, use a remote backend which watches the alloc
|
||||
// status via rpc.
|
||||
func NewAllocWatcher(c Config) (PrevAllocWatcher, PrevAllocMigrator) {
|
||||
if c.Alloc.PreviousAllocation == "" && c.PreemptedRunners == nil {
|
||||
return NoopPrevAlloc{}, NoopPrevAlloc{}
|
||||
|
|
Loading…
Reference in New Issue