Improve drain log messages
Also delay "node complete" after the node has been marked complete to capture a few more alloc events. There are other ways to implement this that could trade off correctness for responsiveness as technically a node is considered drained when all of its allocs have been marked to stop and not when they've actually stopped (which may not happen for a long time).
This commit is contained in:
parent
5eebd53223
commit
be7c759867
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/api/contexts"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/posener/complete"
|
||||
)
|
||||
|
||||
|
@ -359,6 +360,8 @@ func monitorDrain(output func(string), nodeClient *api.Nodes, nodeID string, ind
|
|||
// Update local alloc state
|
||||
initial[a.ID] = a
|
||||
|
||||
migrating := a.DesiredTransition.ShouldMigrate()
|
||||
|
||||
msg := ""
|
||||
switch {
|
||||
case !ok:
|
||||
|
@ -370,9 +373,15 @@ func monitorDrain(output func(string), nodeClient *api.Nodes, nodeID string, ind
|
|||
// Alloc status has changed; output
|
||||
msg = fmt.Sprintf("status %s -> %s", orig.ClientStatus, a.ClientStatus)
|
||||
|
||||
case !orig.DesiredTransition.ShouldMigrate() && a.DesiredTransition.ShouldMigrate():
|
||||
// Alloc marked for migration
|
||||
case migrating && !orig.DesiredTransition.ShouldMigrate():
|
||||
// Alloc was marked for migration
|
||||
msg = "marked for migration"
|
||||
case migrating && (orig.DesiredStatus != a.DesiredStatus) && a.DesiredStatus == structs.AllocDesiredStatusStop:
|
||||
// Alloc has already been marked for migration and is now being stopped
|
||||
msg = "draining"
|
||||
case a.NextAllocation != "" && orig.NextAllocation == "":
|
||||
// Alloc has been replaced by another allocation
|
||||
msg = fmt.Sprintf("replaced by allocation %q", a.NextAllocation)
|
||||
}
|
||||
|
||||
if msg != "" {
|
||||
|
@ -386,14 +395,36 @@ func monitorDrain(output func(string), nodeClient *api.Nodes, nodeID string, ind
|
|||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
done := false
|
||||
for !done {
|
||||
select {
|
||||
case err := <-errCh:
|
||||
return err
|
||||
case <-nodeCh:
|
||||
return nil
|
||||
done = true
|
||||
case msg := <-allocCh:
|
||||
output(msg)
|
||||
}
|
||||
}
|
||||
|
||||
// Loop on alloc messages for a bit longer as we may have gotten the
|
||||
// "node done" first (since the watchers run concurrently the events
|
||||
// may be received out of order)
|
||||
deadline := 250 * time.Millisecond
|
||||
timer := time.NewTimer(deadline)
|
||||
for {
|
||||
select {
|
||||
case err := <-errCh:
|
||||
return err
|
||||
case msg := <-allocCh:
|
||||
output(msg)
|
||||
if !timer.Stop() {
|
||||
<-timer.C
|
||||
}
|
||||
timer.Reset(deadline)
|
||||
case <-timer.C:
|
||||
// No events within deadline, exit
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -183,6 +183,7 @@ func TestNodeDrainCommand_Monitor(t *testing.T) {
|
|||
|
||||
require.Contains(out, "drain complete")
|
||||
for _, a := range allocs {
|
||||
require.Contains(out, fmt.Sprintf("Alloc %q marked for migration", a.ID))
|
||||
require.Contains(out, fmt.Sprintf("Alloc %q draining", a.ID))
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue